func Start(mt *Mysqld, mysqlWaitTime time.Duration) error { var name string // try the mysqld start hook, if any h := hook.NewSimpleHook("mysqld_start") hr := h.Execute() switch hr.ExitStatus { case hook.HOOK_SUCCESS: // hook exists and worked, we can keep going name = "mysqld_start hook" case hook.HOOK_DOES_NOT_EXIST: // hook doesn't exist, run mysqld_safe ourselves log.Infof("No mysqld_start hook, running mysqld_safe directly") dir, err := vtenv.VtMysqlRoot() if err != nil { return err } name = path.Join(dir, "bin/mysqld_safe") arg := []string{ "--defaults-file=" + mt.config.path} env := []string{os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql")} cmd := exec.Command(name, arg...) cmd.Dir = dir cmd.Env = env log.Infof("mysqlctl.Start mysqlWaitTime:%v %#v", mysqlWaitTime, cmd) _, err = cmd.StderrPipe() if err != nil { return nil } err = cmd.Start() if err != nil { return nil } // wait so we don't get a bunch of defunct processes go cmd.Wait() default: // hook failed, we report error return fmt.Errorf("mysqld_start hook failed: %v", hr.String()) } // give it some time to succeed - usually by the time the socket emerges // we are in good shape for i := mysqlWaitTime; i >= 0; i -= time.Second { _, statErr := os.Stat(mt.config.SocketFile) if statErr == nil { // Make sure the socket file isn't stale. conn, connErr := mt.createConnection() if connErr == nil { conn.Close() return nil } } else if !os.IsNotExist(statErr) { return statErr } time.Sleep(time.Second) } return errors.New(name + ": deadline exceeded waiting for " + mt.config.SocketFile) }
func (mysqld *Mysqld) ValidateCloneTarget(hookExtraEnv map[string]string) error { // run a hook to check local things h := hook.NewSimpleHook("preflight_restore") h.ExtraEnv = hookExtraEnv if err := h.ExecuteOptional(); err != nil { return err } qr, err := mysqld.fetchSuperQuery("SHOW DATABASES") if err != nil { return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, %v", err) } for _, row := range qr.Rows { if strings.HasPrefix(row[0].String(), "vt_") { dbName := row[0].String() tableQr, err := mysqld.fetchSuperQuery("SHOW TABLES FROM " + dbName) if err != nil { return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, %v", err) } else if len(tableQr.Rows) == 0 { // no tables == empty db, all is well continue } return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, found active db %v", dbName) } } return nil }
func (mysqld *Mysqld) initConfig(root string) error { var err error var configData string switch hr := hook.NewSimpleHook("make_mycnf").Execute(); hr.ExitStatus { case hook.HOOK_DOES_NOT_EXIST: log.Infof("make_mycnf hook doesn't exist, reading default template files") cnfTemplatePaths := []string{ path.Join(root, "config/mycnf/default.cnf"), path.Join(root, "config/mycnf/master.cnf"), path.Join(root, "config/mycnf/replica.cnf"), } if extraCnf := os.Getenv("EXTRA_MY_CNF"); extraCnf != "" { parts := strings.Split(extraCnf, ":") cnfTemplatePaths = append(cnfTemplatePaths, parts...) } configData, err = mysqld.config.makeMycnf(cnfTemplatePaths) case hook.HOOK_SUCCESS: configData, err = mysqld.config.fillMycnfTemplate(hr.Stdout) default: return fmt.Errorf("make_mycnf hook failed(%v): %v", hr.ExitStatus, hr.Stderr) } if err != nil { return err } return ioutil.WriteFile(mysqld.config.path, []byte(configData), 0664) }
func (wr *Wrangler) checkMasterElect(ti *topo.TabletInfo) error { // Check the master-elect is fit for duty - call out for hardware checks. // if the server was already serving live traffic, it's probably good if ti.IsInServingGraph() { return nil } return wr.ExecuteOptionalTabletInfoHook(ti, hook.NewSimpleHook("preflight_serving_type")) }
// Make this external, since these transitions need to be forced from time to time. func ChangeType(ts topo.Server, tabletAlias topo.TabletAlias, newType topo.TabletType, runHooks bool) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } if !topo.IsTrivialTypeChange(tablet.Type, newType) || !topo.IsValidTypeChange(tablet.Type, newType) { return fmt.Errorf("cannot change tablet type %v -> %v %v", tablet.Type, newType, tabletAlias) } if runHooks { // Only run the preflight_serving_type hook when // transitioning from non-serving to serving. if !topo.IsInServingGraph(tablet.Type) && topo.IsInServingGraph(newType) { if err := hook.NewSimpleHook("preflight_serving_type").ExecuteOptional(); err != nil { return err } } } tablet.Type = newType if newType == topo.TYPE_IDLE { if tablet.Parent.IsZero() { si, err := ts.GetShard(tablet.Keyspace, tablet.Shard) if err != nil { return err } rec := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, cell := range si.Cells { wg.Add(1) go func(cell string) { defer wg.Done() sri, err := ts.GetShardReplication(cell, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("Cannot check cell %v for extra replication paths, assuming it's good", cell) return } for _, rl := range sri.ReplicationLinks { if rl.Parent == tabletAlias { rec.RecordError(fmt.Errorf("Still have a ReplicationLink in cell %v", cell)) } } }(cell) } wg.Wait() if rec.HasErrors() { return rec.Error() } } tablet.Parent = topo.TabletAlias{} tablet.Keyspace = "" tablet.Shard = "" tablet.KeyRange = key.KeyRange{} } return topo.UpdateTablet(ts, tablet) }
// StopSlave stops a slave on the provided MysqldDaemon func StopSlave(md MysqlDaemon, hookExtraEnv map[string]string) error { h := hook.NewSimpleHook("preflight_stop_slave") h.ExtraEnv = hookExtraEnv if err := h.ExecuteOptional(); err != nil { return err } return md.ExecuteSuperQueryList([]string{SQLStopSlave}) }
// StartSlave starts a slave on the provided MysqldDaemon func StartSlave(md MysqlDaemon, hookExtraEnv map[string]string) error { if err := md.ExecuteSuperQueryList([]string{SQLStartSlave}); err != nil { return err } h := hook.NewSimpleHook("postflight_start_slave") h.ExtraEnv = hookExtraEnv return h.ExecuteOptional() }
func (mysqld *Mysqld) StopSlave(hookExtraEnv map[string]string) error { h := hook.NewSimpleHook("preflight_stop_slave") h.ExtraEnv = hookExtraEnv if err := h.ExecuteOptional(); err != nil { return err } return mysqld.executeSuperQuery("SLAVE STOP") }
func (mysqld *Mysqld) StartSlave(hookExtraEnv map[string]string) error { if err := mysqld.executeSuperQuery("SLAVE START"); err != nil { return err } h := hook.NewSimpleHook("postflight_start_slave") h.ExtraEnv = hookExtraEnv return h.ExecuteOptional() }
// Scrap will update the tablet type to 'Scrap', and remove it from // the serving graph. // // 'force' means we are not on the tablet being scrapped, so it is // probably dead. So if 'force' is true, we will also remove pending // remote actions. And if 'force' is false, we also run an optional // hook. func Scrap(ts topo.Server, tabletAlias topo.TabletAlias, force bool) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } // If you are already scrap, skip updating replication data. It won't // be there anyway. wasAssigned := tablet.IsAssigned() tablet.Type = topo.TYPE_SCRAP tablet.Parent = topo.TabletAlias{} // Update the tablet first, since that is canonical. err = topo.UpdateTablet(ts, tablet) if err != nil { return err } // Remove any pending actions. Presumably forcing a scrap // means you don't want the agent doing anything and the // machine requires manual attention. if force { err := ts.PurgeTabletActions(tabletAlias, actionnode.ActionNodeCanBePurged) if err != nil { log.Warningf("purge actions failed: %v", err) } } if wasAssigned { err = topo.DeleteTabletReplicationData(ts, tablet.Tablet) if err != nil { if err == topo.ErrNoNode { log.V(6).Infof("no ShardReplication object for cell %v", tablet.Alias.Cell) err = nil } if err != nil { log.Warningf("remove replication data for %v failed: %v", tablet.Alias, err) } } } // run a hook for final cleanup, only in non-force mode. // (force mode executes on the vtctl side, not on the vttablet side) if !force { hk := hook.NewSimpleHook("postflight_scrap") ConfigureTabletHook(hk, tablet.Alias) if hookErr := hk.ExecuteOptional(); hookErr != nil { // we don't want to return an error, the server // is already in bad shape probably. log.Warningf("Scrap: postflight_scrap failed: %v", hookErr) } } return nil }
// Shutdown will stop the mysqld daemon that is running in the background. // // waitForMysqld: should the function block until mysqld has stopped? // This can actually take a *long* time if the buffer cache needs to be fully // flushed - on the order of 20-30 minutes. func (mysqld *Mysqld) Shutdown(waitForMysqld bool, mysqlWaitTime time.Duration) error { log.Infof("mysqlctl.Shutdown") // possibly mysql is already shutdown, check for a few files first _, socketPathErr := os.Stat(mysqld.config.SocketFile) _, pidPathErr := os.Stat(mysqld.config.PidFile) if socketPathErr != nil && pidPathErr != nil { log.Warningf("assuming shutdown - no socket, no pid file") return nil } // try the mysqld shutdown hook, if any h := hook.NewSimpleHook("mysqld_shutdown") hr := h.Execute() switch hr.ExitStatus { case hook.HOOK_SUCCESS: // hook exists and worked, we can keep going case hook.HOOK_DOES_NOT_EXIST: // hook doesn't exist, try mysqladmin log.Infof("No mysqld_shutdown hook, running mysqladmin directly") dir, err := vtenv.VtMysqlRoot() if err != nil { return err } name := path.Join(dir, "bin/mysqladmin") arg := []string{ "-u", "vt_dba", "-S", mysqld.config.SocketFile, "shutdown"} env := []string{ os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql"), } _, err = execCmd(name, arg, env, dir) if err != nil { return err } default: // hook failed, we report error return fmt.Errorf("mysqld_shutdown hook failed: %v", hr.String()) } // wait for mysqld to really stop. use the sock file as a proxy for that since // we can't call wait() in a process we didn't start. if waitForMysqld { for i := mysqlWaitTime; i >= 0; i -= time.Second { _, statErr := os.Stat(mysqld.config.SocketFile) if statErr != nil && os.IsNotExist(statErr) { return nil } log.Infof("Mysqld.Shutdown: sleeping for 1s waiting for socket file %v", mysqld.config.SocketFile) time.Sleep(time.Second) } return errors.New("gave up waiting for mysqld to stop") } return nil }
// This piece runs on the presumably empty machine acting as the target in the // create replica action. // // validate target (self) // shutdown_mysql() // create temp data directory /vt/target/vt_<keyspace> // copy compressed data files via HTTP // verify hash of compressed files // uncompress into /vt/vt_<target-uid>/data/vt_<keyspace> // start_mysql() // clean up compressed files func (mysqld *Mysqld) RestoreFromSnapshot(snapshotManifest *SnapshotManifest, fetchConcurrency, fetchRetryCount int, dontWaitForSlaveStart bool, hookExtraEnv map[string]string) error { if snapshotManifest == nil { return errors.New("RestoreFromSnapshot: nil snapshotManifest") } relog.Debug("ValidateCloneTarget") if err := mysqld.ValidateCloneTarget(hookExtraEnv); err != nil { return err } relog.Debug("Shutdown mysqld") if err := Shutdown(mysqld, true, MysqlWaitTime); err != nil { return err } relog.Debug("Fetch snapshot") if err := mysqld.fetchSnapshot(snapshotManifest, fetchConcurrency, fetchRetryCount); err != nil { return err } relog.Debug("Restart mysqld") if err := Start(mysqld, MysqlWaitTime); err != nil { return err } cmdList, err := StartReplicationCommands(mysqld, snapshotManifest.ReplicationState) if err != nil { return err } if err := mysqld.executeSuperQueryList(cmdList); err != nil { return err } if !dontWaitForSlaveStart { if err := mysqld.WaitForSlaveStart(SlaveStartDeadline); err != nil { return err } } h := hook.NewSimpleHook("postflight_restore") h.ExtraEnv = hookExtraEnv if err := h.ExecuteOptional(); err != nil { return err } return nil }
// Scrap will update the tablet type to 'Scrap', and remove it from // the serving graph. // // 'force' means we are not on the tablet being scrapped, so it is // probably dead. So if 'force' is true, we will also remove pending // remote actions. And if 'force' is false, we also run an optional // hook. func Scrap(ctx context.Context, ts topo.Server, tabletAlias topo.TabletAlias, force bool) error { tablet, err := ts.GetTablet(ctx, tabletAlias) if err != nil { return err } // If you are already scrap, skip updating replication data. It won't // be there anyway. wasAssigned := tablet.IsAssigned() tablet.Type = topo.TYPE_SCRAP // Update the tablet first, since that is canonical. err = topo.UpdateTablet(ctx, ts, tablet) if err != nil { return err } if wasAssigned { err = topo.DeleteTabletReplicationData(ctx, ts, tablet.Tablet) if err != nil { if err == topo.ErrNoNode { log.V(6).Infof("no ShardReplication object for cell %v", tablet.Alias.Cell) err = nil } if err != nil { log.Warningf("remove replication data for %v failed: %v", tablet.Alias, err) } } } // run a hook for final cleanup, only in non-force mode. // (force mode executes on the vtctl side, not on the vttablet side) if !force { hk := hook.NewSimpleHook("postflight_scrap") ConfigureTabletHook(hk, tablet.Alias) if hookErr := hk.ExecuteOptional(); hookErr != nil { // we don't want to return an error, the server // is already in bad shape probably. log.Warningf("Scrap: postflight_scrap failed: %v", hookErr) } } return nil }
// Make this external, since these transitions need to be forced from time to time. func ChangeType(ts topo.Server, tabletAlias topo.TabletAlias, newType topo.TabletType, runHooks bool) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } if !topo.IsTrivialTypeChange(tablet.Type, newType) || !topo.IsValidTypeChange(tablet.Type, newType) { return fmt.Errorf("cannot change tablet type %v -> %v %v", tablet.Type, newType, tabletAlias) } if runHooks { // Only run the preflight_serving_type hook when // transitioning from non-serving to serving. if !topo.IsServingType(tablet.Type) && topo.IsServingType(newType) { if err := hook.NewSimpleHook("preflight_serving_type").ExecuteOptional(); err != nil { return err } } } tablet.Type = newType if newType == topo.TYPE_IDLE { if tablet.Parent.Uid == topo.NO_TABLET { // With a master the node cannot be set to idle unless we have already removed all of // the derived paths. The global replication path is a good indication that this has // been resolved. children, err := ts.GetReplicationPaths(tablet.Keyspace, tablet.Shard, tablet.ReplicationPath()) if err != nil && err != topo.ErrNoNode { return err } if err == nil && len(children) > 0 { return fmt.Errorf("cannot change tablet type %v -> %v - reparent action has not finished %v", tablet.Type, newType, tabletAlias) } } tablet.Parent = topo.TabletAlias{} tablet.Keyspace = "" tablet.Shard = "" tablet.KeyRange = key.KeyRange{} } return topo.UpdateTablet(ts, tablet) }
// change a tablet type to RESTORE and set all the other arguments. // from now on, we can go to: // - back to IDLE if we don't use the tablet at all (after for instance // a successful ReserveForRestore but a failed Snapshot) // - to SCRAP if something in the process on the target host fails // - to SPARE if the clone works func (ta *TabletActor) changeTypeToRestore(tablet, sourceTablet *topo.TabletInfo, parentAlias topo.TabletAlias, keyRange key.KeyRange) error { // run the optional preflight_assigned hook hk := hook.NewSimpleHook("preflight_assigned") configureTabletHook(hk, ta.tabletAlias) if err := hk.ExecuteOptional(); err != nil { return err } // change the type tablet.Parent = parentAlias tablet.Keyspace = sourceTablet.Keyspace tablet.Shard = sourceTablet.Shard tablet.Type = topo.TYPE_RESTORE tablet.KeyRange = keyRange tablet.DbNameOverride = sourceTablet.DbNameOverride if err := topo.UpdateTablet(ta.ts, tablet); err != nil { return err } // and create the replication graph items return topo.CreateTabletReplicationPaths(ta.ts, tablet.Tablet) }
// Make this external, since in needs to be forced from time to time. func Scrap(ts topo.Server, tabletAlias topo.TabletAlias, force bool) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } // If you are already scrap, skip deleting the path. It won't // be correct since the Parent will be cleared already. wasAssigned := tablet.IsAssigned() replicationPath := "" if wasAssigned { replicationPath = tablet.ReplicationPath() } tablet.Type = topo.TYPE_SCRAP tablet.Parent = topo.TabletAlias{} // Update the tablet first, since that is canonical. err = topo.UpdateTablet(ts, tablet) if err != nil { return err } // Remove any pending actions. Presumably forcing a scrap means you don't // want the agent doing anything and the machine requires manual attention. if force { err := ts.PurgeTabletActions(tabletAlias, ActionNodeCanBePurged) if err != nil { log.Warningf("purge actions failed: %v", err) } } if wasAssigned { err = ts.DeleteReplicationPath(tablet.Keyspace, tablet.Shard, replicationPath) if err != nil { switch err { case topo.ErrNoNode: log.V(6).Infof("no replication path: %v", replicationPath) err = nil case topo.ErrNotEmpty: // If you are forcing the scrapping of a master, you can't update the // replication graph yet, since other nodes are still under the impression // they are slaved to this tablet. // If the node was not empty, we can't do anything about it - the replication // graph needs to be fixed by reparenting. If the action was forced, assume // the user knows best and squelch the error. if tablet.Parent.Uid == topo.NO_TABLET && force { err = nil } } if err != nil { log.Warningf("remove replication path failed: %v %v", replicationPath, err) } } } // run a hook for final cleanup, only in non-force mode. // (force mode executes on the vtctl side, not on the vttablet side) if !force { hk := hook.NewSimpleHook("postflight_scrap") configureTabletHook(hk, tablet.Alias()) if hookErr := hk.ExecuteOptional(); hookErr != nil { // we don't want to return an error, the server // is already in bad shape probably. log.Warningf("Scrap: postflight_scrap failed: %v", hookErr) } } return nil }
// CreateMultiSnapshot create snapshots of the data. // - for a resharding snapshot, keyRanges+keyName+keyType are set, // and tables is empty. This action will create multiple snapshots, // one per keyRange. // - for a vertical split, tables is set, keyRanges = [KeyRange{}] and // keyName+keyType are empty. It will create a single snapshot of // the contents of the tables. // Note combinations of table subset and keyranges are not supported. func (mysqld *Mysqld) CreateMultiSnapshot(keyRanges []key.KeyRange, dbName, keyName string, keyType key.KeyspaceIdType, sourceAddr string, allowHierarchicalReplication bool, snapshotConcurrency int, tables []string, skipSlaveRestart bool, maximumFilesize uint64, hookExtraEnv map[string]string) (snapshotManifestFilenames []string, err error) { if dbName == "" { err = fmt.Errorf("no database name provided") return } if len(tables) > 0 { if len(keyRanges) != 1 || keyRanges[0].IsPartial() { return nil, fmt.Errorf("With tables specified, can only have one full KeyRange") } } // same logic applies here log.Infof("validateCloneSource") if err = mysqld.validateCloneSource(false, hookExtraEnv); err != nil { return } // clean out and start fresh cloneSourcePaths := make(map[key.KeyRange]string) for _, keyRange := range keyRanges { cloneSourcePaths[keyRange] = path.Join(mysqld.SnapshotDir, dataDir, dbName+"-"+string(keyRange.Start.Hex())+","+string(keyRange.End.Hex())) } for _, _path := range cloneSourcePaths { if err = os.RemoveAll(_path); err != nil { return } if err = os.MkdirAll(_path, 0775); err != nil { return } } mainCloneSourcePath := path.Join(mysqld.SnapshotDir, dataDir, dbName+"-all") if err = os.RemoveAll(mainCloneSourcePath); err != nil { return } if err = os.MkdirAll(mainCloneSourcePath, 0775); err != nil { return } // get the schema for each table sd, fetchErr := mysqld.GetSchema(dbName, tables, true) if fetchErr != nil { return []string{}, fetchErr } if len(sd.TableDefinitions) == 0 { return []string{}, fmt.Errorf("empty table list for %v", dbName) } sd.SortByReverseDataLength() // prepareToSnapshot will get the tablet in the rigth state, // and return the current mysql status. slaveStartRequired, readOnly, replicationPosition, myMasterPosition, masterAddr, err := mysqld.prepareToSnapshot(allowHierarchicalReplication, hookExtraEnv) if err != nil { return } if skipSlaveRestart { if slaveStartRequired { log.Infof("Overriding slaveStartRequired to false") } slaveStartRequired = false } defer func() { err = replaceError(err, mysqld.restoreAfterSnapshot(slaveStartRequired, readOnly, hookExtraEnv)) }() // dump the files in parallel with a pre-defined concurrency datafiles := make([]map[key.KeyRange][]SnapshotFile, len(sd.TableDefinitions)) dumpTableWorker := func(i int) (err error) { table := sd.TableDefinitions[i] if table.Type != proto.TABLE_BASE_TABLE { // we just skip views here return nil } if len(tables) > 0 { sfs, err := mysqld.dumpTableFull(table, dbName, mainCloneSourcePath, cloneSourcePaths[key.KeyRange{}], maximumFilesize) if err != nil { return err } datafiles[i] = map[key.KeyRange][]SnapshotFile{ key.KeyRange{}: sfs, } } else { datafiles[i], err = mysqld.dumpTableSplit(table, dbName, keyName, keyType, mainCloneSourcePath, cloneSourcePaths, maximumFilesize) } return } if err = ConcurrentMap(snapshotConcurrency, len(sd.TableDefinitions), dumpTableWorker); err != nil { return } if e := os.Remove(mainCloneSourcePath); e != nil { log.Errorf("Cannot remove %v: %v", mainCloneSourcePath, e) } // Check the replication position after snapshot is done // hasn't changed, to be sure we haven't inserted any data var newReplicationPosition *proto.ReplicationPosition newReplicationPosition, _, err = mysqld.getReplicationPositionForClones(allowHierarchicalReplication) if err != nil { return } if newReplicationPosition.MasterLogGroupId != replicationPosition.MasterLogGroupId { return nil, fmt.Errorf("replicationPosition position changed during snapshot, from %u to %v", replicationPosition.MasterLogGroupId, newReplicationPosition.MasterLogGroupId) } // Write all the manifest files ssmFiles := make([]string, len(keyRanges)) for i, kr := range keyRanges { krDatafiles := make([]SnapshotFile, 0, len(datafiles)) for _, m := range datafiles { krDatafiles = append(krDatafiles, m[kr]...) } ssm, err := NewSplitSnapshotManifest(sourceAddr, mysqld.IpAddr(), masterAddr, dbName, krDatafiles, replicationPosition, myMasterPosition, kr, sd) if err != nil { return nil, err } ssmFiles[i] = path.Join(cloneSourcePaths[kr], partialSnapshotManifestFile) if err = writeJson(ssmFiles[i], ssm); err != nil { return nil, err } } // Call the (optional) hook to send the files somewhere else wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for _, kr := range keyRanges { wg.Add(1) go func(kr key.KeyRange) { defer wg.Done() h := hook.NewSimpleHook("copy_snapshot_to_storage") h.ExtraEnv = make(map[string]string) for k, v := range hookExtraEnv { h.ExtraEnv[k] = v } h.ExtraEnv["KEYRANGE"] = fmt.Sprintf("%v-%v", kr.Start.Hex(), kr.End.Hex()) h.ExtraEnv["SNAPSHOT_PATH"] = cloneSourcePaths[kr] rec.RecordError(h.ExecuteOptional()) }(kr) } wg.Wait() if rec.HasErrors() { return nil, err } // Return all the URLs for the MANIFESTs snapshotURLPaths := make([]string, len(keyRanges)) for i := 0; i < len(keyRanges); i++ { relative, err := filepath.Rel(mysqld.SnapshotDir, ssmFiles[i]) if err != nil { return nil, err } snapshotURLPaths[i] = path.Join(SnapshotURLPath, relative) } return snapshotURLPaths, nil }
// Shutdown will stop the mysqld daemon that is running in the background. // // waitForMysqld: should the function block until mysqld has stopped? // This can actually take a *long* time if the buffer cache needs to be fully // flushed - on the order of 20-30 minutes. // // If a mysqlctld address is provided in a flag, Shutdown will run remotely. func (mysqld *Mysqld) Shutdown(ctx context.Context, waitForMysqld bool) error { log.Infof("Mysqld.Shutdown") // Execute as remote action on mysqlctld if requested. if *socketFile != "" { log.Infof("executing Mysqld.Shutdown() remotely via mysqlctld server: %v", *socketFile) client, err := mysqlctlclient.New("unix", *socketFile) if err != nil { return fmt.Errorf("can't dial mysqlctld: %v", err) } defer client.Close() return client.Shutdown(ctx, waitForMysqld) } // We're shutting down on purpose. We no longer want to be notified when // mysqld terminates. mysqld.mutex.Lock() if mysqld.cancelWaitCmd != nil { close(mysqld.cancelWaitCmd) mysqld.cancelWaitCmd = nil } mysqld.mutex.Unlock() // possibly mysql is already shutdown, check for a few files first _, socketPathErr := os.Stat(mysqld.config.SocketFile) _, pidPathErr := os.Stat(mysqld.config.PidFile) if socketPathErr != nil && pidPathErr != nil { log.Warningf("assuming mysqld already shut down - no socket, no pid file found") return nil } // try the mysqld shutdown hook, if any h := hook.NewSimpleHook("mysqld_shutdown") hr := h.Execute() switch hr.ExitStatus { case hook.HOOK_SUCCESS: // hook exists and worked, we can keep going case hook.HOOK_DOES_NOT_EXIST: // hook doesn't exist, try mysqladmin log.Infof("No mysqld_shutdown hook, running mysqladmin directly") dir, err := vtenv.VtMysqlRoot() if err != nil { return err } name := path.Join(dir, "bin/mysqladmin") arg := []string{ "-u", "vt_dba", "-S", mysqld.config.SocketFile, "shutdown"} env := []string{ os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql"), } _, err = execCmd(name, arg, env, dir) if err != nil { return err } default: // hook failed, we report error return fmt.Errorf("mysqld_shutdown hook failed: %v", hr.String()) } // Wait for mysqld to really stop. Use the sock file as a // proxy for that since we can't call wait() in a process we // didn't start. if waitForMysqld { for { select { case <-ctx.Done(): return errors.New("gave up waiting for mysqld to stop") default: } _, statErr := os.Stat(mysqld.config.SocketFile) if statErr != nil && os.IsNotExist(statErr) { return nil } log.Infof("Mysqld.Shutdown: sleeping for 1s waiting for socket file %v", mysqld.config.SocketFile) time.Sleep(time.Second) } } return nil }
// Start will start the mysql daemon, either by running the 'mysqld_start' // hook, or by running mysqld_safe in the background. // If a mysqlctld address is provided in a flag, Start will run remotely. func (mysqld *Mysqld) Start(ctx context.Context) error { // Execute as remote action on mysqlctld if requested. if *socketFile != "" { log.Infof("executing Mysqld.Start() remotely via mysqlctld server: %v", *socketFile) client, err := mysqlctlclient.New("unix", *socketFile) if err != nil { return fmt.Errorf("can't dial mysqlctld: %v", err) } defer client.Close() return client.Start(ctx) } var name string ts := fmt.Sprintf("Mysqld.Start(%v)", time.Now().Unix()) // try the mysqld start hook, if any switch hr := hook.NewSimpleHook("mysqld_start").Execute(); hr.ExitStatus { case hook.HOOK_SUCCESS: // hook exists and worked, we can keep going name = "mysqld_start hook" case hook.HOOK_DOES_NOT_EXIST: // hook doesn't exist, run mysqld_safe ourselves log.Infof("%v: No mysqld_start hook, running mysqld_safe directly", ts) dir, err := vtenv.VtMysqlRoot() if err != nil { return err } name = path.Join(dir, "bin/mysqld_safe") arg := []string{ "--defaults-file=" + mysqld.config.path} env := []string{os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql")} cmd := exec.Command(name, arg...) cmd.Dir = dir cmd.Env = env log.Infof("%v %#v", ts, cmd) stderr, err := cmd.StderrPipe() if err != nil { return nil } stdout, err := cmd.StdoutPipe() if err != nil { return nil } go func() { scanner := bufio.NewScanner(stderr) for scanner.Scan() { log.Infof("%v stderr: %v", ts, scanner.Text()) } }() go func() { scanner := bufio.NewScanner(stdout) for scanner.Scan() { log.Infof("%v stdout: %v", ts, scanner.Text()) } }() err = cmd.Start() if err != nil { return nil } mysqld.mutex.Lock() mysqld.cancelWaitCmd = make(chan struct{}) go func(cancel <-chan struct{}) { // Wait regardless of cancel, so we don't generate defunct processes. err := cmd.Wait() log.Infof("%v exit: %v", ts, err) // The process exited. Trigger OnTerm callbacks, unless we were cancelled. select { case <-cancel: default: mysqld.mutex.Lock() for _, callback := range mysqld.onTermFuncs { go callback() } mysqld.mutex.Unlock() } }(mysqld.cancelWaitCmd) mysqld.mutex.Unlock() default: // hook failed, we report error return fmt.Errorf("mysqld_start hook failed: %v", hr.String()) } // give it some time to succeed - usually by the time the socket emerges // we are in good shape for { select { case <-ctx.Done(): return errors.New(name + ": deadline exceeded waiting for " + mysqld.config.SocketFile) default: } _, statErr := os.Stat(mysqld.config.SocketFile) if statErr == nil { // Make sure the socket file isn't stale. conn, connErr := mysqld.dbaPool.Get(0) if connErr == nil { conn.Recycle() return nil } } else if !os.IsNotExist(statErr) { return statErr } log.Infof("%v: sleeping for 1s waiting for socket file %v", ts, mysqld.config.SocketFile) time.Sleep(time.Second) } }
func (ta *TabletActor) multiRestore(actionNode *actionnode.ActionNode) (err error) { args := actionNode.Args.(*actionnode.MultiRestoreArgs) // read our current tablet, verify its state // we only support restoring to the master or active replicas tablet, err := ta.ts.GetTablet(ta.tabletAlias) if err != nil { return err } if tablet.Type != topo.TYPE_MASTER && !topo.IsSlaveType(tablet.Type) { return fmt.Errorf("expected master, or slave type, not %v: %v", tablet.Type, ta.tabletAlias) } // get source tablets addresses sourceAddrs := make([]*url.URL, len(args.SrcTabletAliases)) keyRanges := make([]key.KeyRange, len(args.SrcTabletAliases)) fromStoragePaths := make([]string, len(args.SrcTabletAliases)) for i, alias := range args.SrcTabletAliases { t, e := ta.ts.GetTablet(alias) if e != nil { return e } sourceAddrs[i] = &url.URL{ Host: t.Addr(), Path: "/" + t.DbName(), } keyRanges[i], e = key.KeyRangesOverlap(tablet.KeyRange, t.KeyRange) if e != nil { return e } fromStoragePaths[i] = path.Join(ta.mysqld.SnapshotDir, "from-storage", fmt.Sprintf("from-%v-%v", keyRanges[i].Start.Hex(), keyRanges[i].End.Hex())) } // change type to restore, no change to replication graph originalType := tablet.Type tablet.Type = topo.TYPE_RESTORE err = topo.UpdateTablet(ta.ts, tablet) if err != nil { return err } // first try to get the data from a remote storage wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for i, alias := range args.SrcTabletAliases { wg.Add(1) go func(i int, alias topo.TabletAlias) { defer wg.Done() h := hook.NewSimpleHook("copy_snapshot_from_storage") h.ExtraEnv = make(map[string]string) for k, v := range ta.hookExtraEnv() { h.ExtraEnv[k] = v } h.ExtraEnv["KEYRANGE"] = fmt.Sprintf("%v-%v", keyRanges[i].Start.Hex(), keyRanges[i].End.Hex()) h.ExtraEnv["SNAPSHOT_PATH"] = fromStoragePaths[i] h.ExtraEnv["SOURCE_TABLET_ALIAS"] = alias.String() hr := h.Execute() if hr.ExitStatus != hook.HOOK_SUCCESS { rec.RecordError(fmt.Errorf("%v hook failed(%v): %v", h.Name, hr.ExitStatus, hr.Stderr)) } }(i, alias) } wg.Wait() // run the action, scrap if it fails if rec.HasErrors() { log.Infof("Got errors trying to get snapshots from storage, trying to get them from original tablets: %v", rec.Error()) err = ta.mysqld.MultiRestore(tablet.DbName(), keyRanges, sourceAddrs, nil, args.Concurrency, args.FetchConcurrency, args.InsertTableConcurrency, args.FetchRetryCount, args.Strategy) } else { log.Infof("Got snapshots from storage, reading them from disk directly") err = ta.mysqld.MultiRestore(tablet.DbName(), keyRanges, nil, fromStoragePaths, args.Concurrency, args.FetchConcurrency, args.InsertTableConcurrency, args.FetchRetryCount, args.Strategy) } if err != nil { if e := topotools.Scrap(ta.ts, ta.tabletAlias, false); e != nil { log.Errorf("Failed to Scrap after failed RestoreFromMultiSnapshot: %v", e) } return err } // restore type back tablet.Type = originalType return topo.UpdateTablet(ta.ts, tablet) }
// Start will start the mysql daemon, either by running the 'mysqld_start' // hook, or by running mysqld_safe in the background. func (mysqld *Mysqld) Start(mysqlWaitTime time.Duration) error { var name string ts := fmt.Sprintf("Mysqld.Start(%v)", time.Now().Unix()) // try the mysqld start hook, if any switch hr := hook.NewSimpleHook("mysqld_start").Execute(); hr.ExitStatus { case hook.HOOK_SUCCESS: // hook exists and worked, we can keep going name = "mysqld_start hook" case hook.HOOK_DOES_NOT_EXIST: // hook doesn't exist, run mysqld_safe ourselves log.Infof("%v: No mysqld_start hook, running mysqld_safe directly", ts) dir, err := vtenv.VtMysqlRoot() if err != nil { return err } name = path.Join(dir, "bin/mysqld_safe") arg := []string{ "--defaults-file=" + mysqld.config.path} env := []string{os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql")} cmd := exec.Command(name, arg...) cmd.Dir = dir cmd.Env = env log.Infof("%v mysqlWaitTime:%v %#v", ts, mysqlWaitTime, cmd) stderr, err := cmd.StderrPipe() if err != nil { return nil } stdout, err := cmd.StdoutPipe() if err != nil { return nil } go func() { scanner := bufio.NewScanner(stderr) for scanner.Scan() { log.Infof("%v stderr: %v", ts, scanner.Text()) } }() go func() { scanner := bufio.NewScanner(stdout) for scanner.Scan() { log.Infof("%v stdout: %v", ts, scanner.Text()) } }() err = cmd.Start() if err != nil { return nil } mysqld.done = make(chan struct{}) go func(done chan<- struct{}) { // wait so we don't get a bunch of defunct processes err := cmd.Wait() log.Infof("%v exit: %v", ts, err) close(done) }(mysqld.done) default: // hook failed, we report error return fmt.Errorf("mysqld_start hook failed: %v", hr.String()) } // give it some time to succeed - usually by the time the socket emerges // we are in good shape for i := mysqlWaitTime; i >= 0; i -= time.Second { _, statErr := os.Stat(mysqld.config.SocketFile) if statErr == nil { // Make sure the socket file isn't stale. conn, connErr := mysqld.dbaPool.Get(0) if connErr == nil { conn.Recycle() return nil } } else if !os.IsNotExist(statErr) { return statErr } log.Infof("%v: sleeping for 1s waiting for socket file %v", ts, mysqld.config.SocketFile) time.Sleep(time.Second) } return errors.New(name + ": deadline exceeded waiting for " + mysqld.config.SocketFile) }
// Start will start the mysql daemon, either by running the 'mysqld_start' // hook, or by running mysqld_safe in the background. // If a mysqlctld address is provided in a flag, Start will run remotely. func (mysqld *Mysqld) Start(ctx context.Context) error { // Execute as remote action on mysqlctld if requested. if *socketFile != "" { log.Infof("executing Mysqld.Start() remotely via mysqlctld server: %v", *socketFile) client, err := mysqlctlclient.New("unix", *socketFile) if err != nil { return fmt.Errorf("can't dial mysqlctld: %v", err) } defer client.Close() return client.Start(ctx) } var name string ts := fmt.Sprintf("Mysqld.Start(%v)", time.Now().Unix()) // try the mysqld start hook, if any switch hr := hook.NewSimpleHook("mysqld_start").Execute(); hr.ExitStatus { case hook.HOOK_SUCCESS: // hook exists and worked, we can keep going name = "mysqld_start hook" case hook.HOOK_DOES_NOT_EXIST: // hook doesn't exist, run mysqld_safe ourselves log.Infof("%v: No mysqld_start hook, running mysqld_safe directly", ts) dir, err := vtenv.VtMysqlRoot() if err != nil { return err } name, err = binaryPath(dir, "mysqld_safe") if err != nil { return err } arg := []string{ "--defaults-file=" + mysqld.config.path} env := []string{os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql")} cmd := exec.Command(name, arg...) cmd.Dir = dir cmd.Env = env log.Infof("%v %#v", ts, cmd) stderr, err := cmd.StderrPipe() if err != nil { return nil } stdout, err := cmd.StdoutPipe() if err != nil { return nil } go func() { scanner := bufio.NewScanner(stderr) for scanner.Scan() { log.Infof("%v stderr: %v", ts, scanner.Text()) } }() go func() { scanner := bufio.NewScanner(stdout) for scanner.Scan() { log.Infof("%v stdout: %v", ts, scanner.Text()) } }() err = cmd.Start() if err != nil { return nil } mysqld.mutex.Lock() mysqld.cancelWaitCmd = make(chan struct{}) go func(cancel <-chan struct{}) { // Wait regardless of cancel, so we don't generate defunct processes. err := cmd.Wait() log.Infof("%v exit: %v", ts, err) // The process exited. Trigger OnTerm callbacks, unless we were cancelled. select { case <-cancel: default: mysqld.mutex.Lock() for _, callback := range mysqld.onTermFuncs { go callback() } mysqld.mutex.Unlock() } }(mysqld.cancelWaitCmd) mysqld.mutex.Unlock() default: // hook failed, we report error return fmt.Errorf("mysqld_start hook failed: %v", hr.String()) } return mysqld.Wait(ctx) }
func Init(mt *Mysqld, mysqlWaitTime time.Duration) error { log.Infof("mysqlctl.Init") err := mt.createDirs() if err != nil { log.Errorf("%s", err.Error()) return err } root, err := vtenv.VtRoot() if err != nil { log.Errorf("%s", err.Error()) return err } hr := hook.NewSimpleHook("make_mycnf").Execute() configData := "" if hr.ExitStatus == hook.HOOK_DOES_NOT_EXIST { log.Infof("make_mycnf hook doesn't exist") cnfTemplatePaths := []string{ path.Join(root, "config/mycnf/default.cnf"), path.Join(root, "config/mycnf/master.cnf"), path.Join(root, "config/mycnf/replica.cnf"), } if extraCnf := os.Getenv("EXTRA_MY_CNF"); extraCnf != "" { parts := strings.Split(extraCnf, ":") cnfTemplatePaths = append(cnfTemplatePaths, parts...) } configData, err = MakeMycnf(mt.config, cnfTemplatePaths) } else if hr.ExitStatus == hook.HOOK_SUCCESS { configData, err = fillMycnfTemplate(mt.config, hr.Stdout) } else { err = fmt.Errorf("make_mycnf hook failed(%v): %v", hr.ExitStatus, hr.Stderr) } if err == nil { err = ioutil.WriteFile(mt.config.path, []byte(configData), 0664) } if err != nil { log.Errorf("failed creating %v: %v", mt.config.path, err) return err } dbTbzPath := path.Join(root, "data/bootstrap/mysql-db-dir.tbz") log.Infof("decompress bootstrap db %v", dbTbzPath) args := []string{"-xj", "-C", mt.config.DataDir, "-f", dbTbzPath} _, tarErr := execCmd("tar", args, []string{}, "") if tarErr != nil { log.Errorf("failed unpacking %v: %v", dbTbzPath, tarErr) return tarErr } if err = Start(mt, mysqlWaitTime); err != nil { log.Errorf("failed starting, check %v", mt.config.ErrorLogPath) return err } schemaPath := path.Join(root, "data/bootstrap/_vt_schema.sql") schema, err := ioutil.ReadFile(schemaPath) if err != nil { return err } sqlCmds := make([]string, 0, 10) log.Infof("initial schema: %v", string(schema)) for _, cmd := range strings.Split(string(schema), ";") { cmd = strings.TrimSpace(cmd) if cmd == "" { continue } sqlCmds = append(sqlCmds, cmd) } return mt.executeSuperQueryList(sqlCmds) }