func (mysqld *Mysqld) initConfig(root string) error { var err error var configData string switch hr := hook.NewSimpleHook("make_mycnf").Execute(); hr.ExitStatus { case hook.HOOK_DOES_NOT_EXIST: log.Infof("make_mycnf hook doesn't exist, reading default template files") cnfTemplatePaths := []string{ path.Join(root, "config/mycnf/default.cnf"), path.Join(root, "config/mycnf/master.cnf"), path.Join(root, "config/mycnf/replica.cnf"), } if extraCnf := os.Getenv("EXTRA_MY_CNF"); extraCnf != "" { parts := strings.Split(extraCnf, ":") cnfTemplatePaths = append(cnfTemplatePaths, parts...) } configData, err = mysqld.config.makeMycnf(cnfTemplatePaths) case hook.HOOK_SUCCESS: configData, err = mysqld.config.fillMycnfTemplate(hr.Stdout) default: return fmt.Errorf("make_mycnf hook failed(%v): %v", hr.ExitStatus, hr.Stderr) } if err != nil { return err } return ioutil.WriteFile(mysqld.config.path, []byte(configData), 0664) }
func (mysqld *Mysqld) ValidateCloneTarget(hookExtraEnv map[string]string) error { // run a hook to check local things h := hook.NewSimpleHook("preflight_restore") h.ExtraEnv = hookExtraEnv if err := h.ExecuteOptional(); err != nil { return err } qr, err := mysqld.fetchSuperQuery("SHOW DATABASES") if err != nil { return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, %v", err) } for _, row := range qr.Rows { if strings.HasPrefix(row[0].String(), "vt_") { dbName := row[0].String() tableQr, err := mysqld.fetchSuperQuery("SHOW TABLES FROM " + dbName) if err != nil { return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, %v", err) } else if len(tableQr.Rows) == 0 { // no tables == empty db, all is well continue } return fmt.Errorf("mysqlctl: ValidateCloneTarget failed, found active db %v", dbName) } } return nil }
func (wr *Wrangler) checkMasterElect(ti *topo.TabletInfo) error { // Check the master-elect is fit for duty - call out for hardware checks. // if the server was already serving live traffic, it's probably good if ti.IsInServingGraph() { return nil } return wr.ExecuteOptionalTabletInfoHook(ti, hook.NewSimpleHook("preflight_serving_type")) }
func (mysqld *Mysqld) StopSlave(hookExtraEnv map[string]string) error { h := hook.NewSimpleHook("preflight_stop_slave") h.ExtraEnv = hookExtraEnv if err := h.ExecuteOptional(); err != nil { return err } return mysqld.ExecuteSuperQuery("STOP SLAVE") }
func (mysqld *Mysqld) StartSlave(hookExtraEnv map[string]string) error { if err := mysqld.ExecuteSuperQuery("START SLAVE"); err != nil { return err } h := hook.NewSimpleHook("postflight_start_slave") h.ExtraEnv = hookExtraEnv return h.ExecuteOptional() }
// This piece runs on the presumably empty machine acting as the target in the // create replica action. // // validate target (self) // shutdown_mysql() // create temp data directory /vt/target/vt_<keyspace> // copy compressed data files via HTTP // verify hash of compressed files // uncompress into /vt/vt_<target-uid>/data/vt_<keyspace> // start_mysql() // clean up compressed files func (mysqld *Mysqld) RestoreFromSnapshot(logger logutil.Logger, snapshotManifest *SnapshotManifest, fetchConcurrency, fetchRetryCount int, dontWaitForSlaveStart bool, hookExtraEnv map[string]string) error { if snapshotManifest == nil { return errors.New("RestoreFromSnapshot: nil snapshotManifest") } logger.Infof("ValidateCloneTarget") if err := mysqld.ValidateCloneTarget(hookExtraEnv); err != nil { return err } logger.Infof("Shutdown mysqld") if err := mysqld.Shutdown(true, MysqlWaitTime); err != nil { return err } logger.Infof("Fetch snapshot") if err := mysqld.fetchSnapshot(snapshotManifest, fetchConcurrency, fetchRetryCount); err != nil { return err } logger.Infof("Restart mysqld") if err := mysqld.Start(MysqlWaitTime); err != nil { return err } cmdList, err := mysqld.StartReplicationCommands(snapshotManifest.ReplicationStatus) if err != nil { return err } if err := mysqld.ExecuteSuperQueryList(cmdList); err != nil { return err } if !dontWaitForSlaveStart { if err := mysqld.WaitForSlaveStart(SlaveStartDeadline); err != nil { return err } } h := hook.NewSimpleHook("postflight_restore") h.ExtraEnv = hookExtraEnv if err := h.ExecuteOptional(); err != nil { return err } return nil }
// Scrap will update the tablet type to 'Scrap', and remove it from // the serving graph. // // 'force' means we are not on the tablet being scrapped, so it is // probably dead. So if 'force' is true, we will also remove pending // remote actions. And if 'force' is false, we also run an optional // hook. func Scrap(ts topo.Server, tabletAlias topo.TabletAlias, force bool) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } // If you are already scrap, skip updating replication data. It won't // be there anyway. wasAssigned := tablet.IsAssigned() tablet.Type = topo.TYPE_SCRAP tablet.Parent = topo.TabletAlias{} // Update the tablet first, since that is canonical. err = topo.UpdateTablet(context.TODO(), ts, tablet) if err != nil { return err } if wasAssigned { err = topo.DeleteTabletReplicationData(ts, tablet.Tablet) if err != nil { if err == topo.ErrNoNode { log.V(6).Infof("no ShardReplication object for cell %v", tablet.Alias.Cell) err = nil } if err != nil { log.Warningf("remove replication data for %v failed: %v", tablet.Alias, err) } } } // run a hook for final cleanup, only in non-force mode. // (force mode executes on the vtctl side, not on the vttablet side) if !force { hk := hook.NewSimpleHook("postflight_scrap") ConfigureTabletHook(hk, tablet.Alias) if hookErr := hk.ExecuteOptional(); hookErr != nil { // we don't want to return an error, the server // is already in bad shape probably. log.Warningf("Scrap: postflight_scrap failed: %v", hookErr) } } return nil }
// change a tablet type to RESTORE and set all the other arguments. // from now on, we can go to: // - back to IDLE if we don't use the tablet at all (after for instance // a successful ReserveForRestore but a failed Snapshot) // - to SCRAP if something in the process on the target host fails // - to SPARE if the clone works func (agent *ActionAgent) changeTypeToRestore(ctx context.Context, tablet, sourceTablet *topo.TabletInfo, parentAlias topo.TabletAlias, keyRange key.KeyRange) error { // run the optional preflight_assigned hook hk := hook.NewSimpleHook("preflight_assigned") topotools.ConfigureTabletHook(hk, agent.TabletAlias) if err := hk.ExecuteOptional(); err != nil { return err } // change the type tablet.Parent = parentAlias tablet.Keyspace = sourceTablet.Keyspace tablet.Shard = sourceTablet.Shard tablet.Type = topo.TYPE_RESTORE tablet.KeyRange = keyRange tablet.DbNameOverride = sourceTablet.DbNameOverride if err := topo.UpdateTablet(ctx, agent.TopoServer, tablet); err != nil { return err } // and create the replication graph items return topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet.Tablet) }
// Start will start the mysql daemon, either by running the 'mysqld_start' // hook, or by running mysqld_safe in the background. // If a mysqlctld address is provided in a flag, Start will run remotely. func (mysqld *Mysqld) Start(mysqlWaitTime time.Duration) error { // Execute as remote action on mysqlctld if requested. if *socketFile != "" { log.Infof("executing Mysqld.Start() remotely via mysqlctld server: %v", *socketFile) client, err := mysqlctlclient.New("unix", *socketFile, mysqlWaitTime) if err != nil { return fmt.Errorf("can't dial mysqlctld: %v", err) } defer client.Close() return client.Start(mysqlWaitTime) } var name string ts := fmt.Sprintf("Mysqld.Start(%v)", time.Now().Unix()) // try the mysqld start hook, if any switch hr := hook.NewSimpleHook("mysqld_start").Execute(); hr.ExitStatus { case hook.HOOK_SUCCESS: // hook exists and worked, we can keep going name = "mysqld_start hook" case hook.HOOK_DOES_NOT_EXIST: // hook doesn't exist, run mysqld_safe ourselves log.Infof("%v: No mysqld_start hook, running mysqld_safe directly", ts) dir, err := vtenv.VtMysqlRoot() if err != nil { return err } name = path.Join(dir, "bin/mysqld_safe") arg := []string{ "--defaults-file=" + mysqld.config.path} env := []string{os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql")} cmd := exec.Command(name, arg...) cmd.Dir = dir cmd.Env = env log.Infof("%v mysqlWaitTime:%v %#v", ts, mysqlWaitTime, cmd) stderr, err := cmd.StderrPipe() if err != nil { return nil } stdout, err := cmd.StdoutPipe() if err != nil { return nil } go func() { scanner := bufio.NewScanner(stderr) for scanner.Scan() { log.Infof("%v stderr: %v", ts, scanner.Text()) } }() go func() { scanner := bufio.NewScanner(stdout) for scanner.Scan() { log.Infof("%v stdout: %v", ts, scanner.Text()) } }() err = cmd.Start() if err != nil { return nil } mysqld.mutex.Lock() mysqld.cancelWaitCmd = make(chan struct{}) go func(cancel <-chan struct{}) { // Wait regardless of cancel, so we don't generate defunct processes. err := cmd.Wait() log.Infof("%v exit: %v", ts, err) // The process exited. Trigger OnTerm callbacks, unless we were cancelled. select { case <-cancel: default: mysqld.mutex.Lock() for _, callback := range mysqld.onTermFuncs { go callback() } mysqld.mutex.Unlock() } }(mysqld.cancelWaitCmd) mysqld.mutex.Unlock() default: // hook failed, we report error return fmt.Errorf("mysqld_start hook failed: %v", hr.String()) } // give it some time to succeed - usually by the time the socket emerges // we are in good shape for i := mysqlWaitTime; i >= 0; i -= time.Second { _, statErr := os.Stat(mysqld.config.SocketFile) if statErr == nil { // Make sure the socket file isn't stale. conn, connErr := mysqld.dbaPool.Get(0) if connErr == nil { conn.Recycle() return nil } } else if !os.IsNotExist(statErr) { return statErr } log.Infof("%v: sleeping for 1s waiting for socket file %v", ts, mysqld.config.SocketFile) time.Sleep(time.Second) } return errors.New(name + ": deadline exceeded waiting for " + mysqld.config.SocketFile) }
// Shutdown will stop the mysqld daemon that is running in the background. // // waitForMysqld: should the function block until mysqld has stopped? // This can actually take a *long* time if the buffer cache needs to be fully // flushed - on the order of 20-30 minutes. // // If a mysqlctld address is provided in a flag, Shutdown will run remotely. func (mysqld *Mysqld) Shutdown(waitForMysqld bool, mysqlWaitTime time.Duration) error { log.Infof("Mysqld.Shutdown") // Execute as remote action on mysqlctld if requested. if *socketFile != "" { log.Infof("executing Mysqld.Shutdown() remotely via mysqlctld server: %v", *socketFile) client, err := mysqlctlclient.New("unix", *socketFile, mysqlWaitTime) if err != nil { return fmt.Errorf("can't dial mysqlctld: %v", err) } defer client.Close() return client.Shutdown(waitForMysqld, mysqlWaitTime) } // We're shutting down on purpose. We no longer want to be notified when // mysqld terminates. mysqld.mutex.Lock() if mysqld.cancelWaitCmd != nil { close(mysqld.cancelWaitCmd) mysqld.cancelWaitCmd = nil } mysqld.mutex.Unlock() // possibly mysql is already shutdown, check for a few files first _, socketPathErr := os.Stat(mysqld.config.SocketFile) _, pidPathErr := os.Stat(mysqld.config.PidFile) if socketPathErr != nil && pidPathErr != nil { log.Warningf("assuming mysqld already shut down - no socket, no pid file found") return nil } // try the mysqld shutdown hook, if any h := hook.NewSimpleHook("mysqld_shutdown") hr := h.Execute() switch hr.ExitStatus { case hook.HOOK_SUCCESS: // hook exists and worked, we can keep going case hook.HOOK_DOES_NOT_EXIST: // hook doesn't exist, try mysqladmin log.Infof("No mysqld_shutdown hook, running mysqladmin directly") dir, err := vtenv.VtMysqlRoot() if err != nil { return err } name := path.Join(dir, "bin/mysqladmin") arg := []string{ "-u", "vt_dba", "-S", mysqld.config.SocketFile, "shutdown"} env := []string{ os.ExpandEnv("LD_LIBRARY_PATH=$VT_MYSQL_ROOT/lib/mysql"), } _, err = execCmd(name, arg, env, dir) if err != nil { return err } default: // hook failed, we report error return fmt.Errorf("mysqld_shutdown hook failed: %v", hr.String()) } // wait for mysqld to really stop. use the sock file as a proxy for that since // we can't call wait() in a process we didn't start. if waitForMysqld { for i := mysqlWaitTime; i >= 0; i -= time.Second { _, statErr := os.Stat(mysqld.config.SocketFile) if statErr != nil && os.IsNotExist(statErr) { return nil } log.Infof("Mysqld.Shutdown: sleeping for 1s waiting for socket file %v", mysqld.config.SocketFile) time.Sleep(time.Second) } return errors.New("gave up waiting for mysqld to stop") } return nil }
// ChangeType changes the type of the tablet and possibly also updates // the health informaton for it. Make this external, since these // transitions need to be forced from time to time. // // - if health is nil, we don't touch the Tablet's Health record. // - if health is an empty map, we clear the Tablet's Health record. // - if health has values, we overwrite the Tablet's Health record. func ChangeType(ts topo.Server, tabletAlias topo.TabletAlias, newType topo.TabletType, health map[string]string, runHooks bool) error { tablet, err := ts.GetTablet(tabletAlias) if err != nil { return err } if !topo.IsTrivialTypeChange(tablet.Type, newType) || !topo.IsValidTypeChange(tablet.Type, newType) { return fmt.Errorf("cannot change tablet type %v -> %v %v", tablet.Type, newType, tabletAlias) } if runHooks { // Only run the preflight_serving_type hook when // transitioning from non-serving to serving. if !topo.IsInServingGraph(tablet.Type) && topo.IsInServingGraph(newType) { if err := hook.NewSimpleHook("preflight_serving_type").ExecuteOptional(); err != nil { return err } } } tablet.Type = newType if newType == topo.TYPE_IDLE { if tablet.Parent.IsZero() { si, err := ts.GetShard(tablet.Keyspace, tablet.Shard) if err != nil { return err } rec := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, cell := range si.Cells { wg.Add(1) go func(cell string) { defer wg.Done() sri, err := ts.GetShardReplication(cell, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("Cannot check cell %v for extra replication paths, assuming it's good", cell) return } for _, rl := range sri.ReplicationLinks { if rl.Parent == tabletAlias { rec.RecordError(fmt.Errorf("Still have a ReplicationLink in cell %v", cell)) } } }(cell) } wg.Wait() if rec.HasErrors() { return rec.Error() } } tablet.Parent = topo.TabletAlias{} tablet.Keyspace = "" tablet.Shard = "" tablet.KeyRange = key.KeyRange{} tablet.Health = health } if health != nil { if len(health) == 0 { tablet.Health = nil } else { tablet.Health = health } } return topo.UpdateTablet(context.TODO(), ts, tablet) }