// ExecuteVtworkerCommand is part of the vtworkerdatapb.VtworkerServer interface func (s *VtworkerServer) ExecuteVtworkerCommand(args *vtworkerdatapb.ExecuteVtworkerCommandRequest, stream vtworkerservicepb.Vtworker_ExecuteVtworkerCommandServer) (err error) { // Please note that this panic handler catches only panics occuring in the code below. // The actual execution of the vtworker command takes place in a new go routine // (started in Instance.setAndStartWorker()) which has its own panic handler. defer servenv.HandlePanic("vtworker", &err) // Stream everything back what the Wrangler is logging. logstream := logutil.NewCallbackLogger(func(e *logutilpb.Event) { // If the client disconnects, we will just fail // to send the log events, but won't interrupt // the command. stream.Send(&vtworkerdatapb.ExecuteVtworkerCommandResponse{ Event: e, }) }) // Let the Wrangler also log everything to the console (and thereby // effectively to a logfile) to make sure that any information or errors // is preserved in the logs in case the RPC or vtworker crashes. logger := logutil.NewTeeLogger(logstream, logutil.NewConsoleLogger()) // create the wrangler wr := s.wi.CreateWrangler(logger) // execute the command worker, done, err := s.wi.RunCommand(args.Args, wr, false /*runFromCli*/) if err == nil && worker != nil && done != nil { err = s.wi.WaitForCommand(worker, done) } return err }
func runCommand(args []string) { wrk := commandWorker(wr, args) done, err := setAndStartWorker(wrk, logutil.NewConsoleLogger()) if err != nil { log.Fatalf("Cannot set worker: %v", err) } // a go routine displays the status every second go func() { timer := time.Tick(*commandDisplayInterval) for { select { case <-done: log.Infof("Command is done:") log.Info(wrk.StatusAsText()) if wrk.Error() != nil { os.Exit(1) } os.Exit(0) case <-timer: log.Info(wrk.StatusAsText()) } } }() }
func (ar *ActionRepository) ApplyTabletAction(actionName string, tabletAlias topo.TabletAlias, r *http.Request) *ActionResult { result := &ActionResult{Name: actionName, Parameters: tabletAlias.String()} action, ok := ar.tabletActions[actionName] if !ok { result.error("Unknown tablet action") return result } // check the role if action.role != "" { if err := acl.CheckAccessHTTP(r, action.role); err != nil { result.error("Access denied") return result } } // run the action wr := wrangler.New(logutil.NewConsoleLogger(), ar.ts, wrangler.DefaultActionTimeout, actionnode.DefaultLockTimeout) output, err := action.method(wr, tabletAlias, r) if err != nil { result.error(err.Error()) return result } result.Output = output return result }
func TestSchemaManagerExecutorFail(t *testing.T) { sql := "create table test_table (pk int)" controller := newFakeController([]string{sql}, false, false, false) fakeTmc := newFakeTabletManagerClient() fakeTmc.AddSchemaChange(sql, &tabletmanagerdatapb.SchemaChangeResult{ BeforeSchema: &tabletmanagerdatapb.SchemaDefinition{}, AfterSchema: &tabletmanagerdatapb.SchemaDefinition{ DatabaseSchema: "CREATE DATABASE `{{.DatabaseName}}` /*!40100 DEFAULT CHARACTER SET utf8 */", TableDefinitions: []*tabletmanagerdatapb.TableDefinition{ { Name: "test_table", Schema: sql, Type: tmutils.TableBaseTable, }, }, }, }) fakeTmc.AddSchemaDefinition("vt_test_keyspace", &tabletmanagerdatapb.SchemaDefinition{}) fakeTmc.EnableExecuteFetchAsDbaError = true wr := wrangler.New(logutil.NewConsoleLogger(), newFakeTopo(), fakeTmc) executor := NewTabletExecutor(wr, testWaitSlaveTimeout) ctx := context.Background() err := Run(ctx, controller, executor) if err == nil { t.Fatalf("schema change should fail") } }
// TestEmergencyReparentShardMasterElectNotBest tries to emergency reparent // to a host that is not the latest in replication position. func TestEmergencyReparentShardMasterElectNotBest(t *testing.T) { ctx := context.Background() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient(), time.Second) // Create a master, a couple good slaves oldMaster := NewFakeTablet(t, wr, "cell1", 0, topo.TYPE_MASTER) newMaster := NewFakeTablet(t, wr, "cell1", 1, topo.TYPE_REPLICA) moreAdvancedSlave := NewFakeTablet(t, wr, "cell1", 2, topo.TYPE_REPLICA) // new master newMaster.FakeMysqlDaemon.Replicating = true newMaster.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{ Domain: 2, Server: 123, Sequence: 456, }, } newMaster.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", } newMaster.StartActionLoop(t, wr) defer newMaster.StopActionLoop(t) // old master, will be scrapped oldMaster.StartActionLoop(t, wr) defer oldMaster.StopActionLoop(t) // more advanced slave moreAdvancedSlave.FakeMysqlDaemon.Replicating = true moreAdvancedSlave.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{ Domain: 2, Server: 123, Sequence: 457, }, } moreAdvancedSlave.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "STOP SLAVE", } moreAdvancedSlave.StartActionLoop(t, wr) defer moreAdvancedSlave.StopActionLoop(t) // run EmergencyReparentShard if err := wr.EmergencyReparentShard(ctx, newMaster.Tablet.Keyspace, newMaster.Tablet.Shard, newMaster.Tablet.Alias, 10*time.Second); err == nil || !strings.Contains(err.Error(), "is more advanced than master elect tablet") { t.Fatalf("EmergencyReparentShard returned the wrong error: %v", err) } // check what was run if err := newMaster.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("newMaster.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } if err := oldMaster.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("oldMaster.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } if err := moreAdvancedSlave.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("moreAdvancedSlave.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } }
// ApplyShardAction applies the provided action to the shard. func (ar *ActionRepository) ApplyShardAction(ctx context.Context, actionName, keyspace, shard string, r *http.Request) *ActionResult { // if the shard name contains a '-', we assume it's the // name for a ranged based shard, so we lower case it. if strings.Contains(shard, "-") { shard = strings.ToLower(shard) } result := &ActionResult{Name: actionName, Parameters: keyspace + "/" + shard} action, ok := ar.shardActions[actionName] if !ok { result.error("Unknown shard action") return result } ctx, cancel := context.WithTimeout(ctx, *actionTimeout) wr := wrangler.New(logutil.NewConsoleLogger(), ar.ts, tmclient.NewTabletManagerClient()) output, err := action(ctx, wr, keyspace, shard, r) cancel() if err != nil { result.error(err.Error()) return result } result.Output = output return result }
// RestoreFromBackup deletes all local data and restores anew from the latest backup. func (agent *ActionAgent) RestoreFromBackup(ctx context.Context, logger logutil.Logger) error { if err := agent.lock(ctx); err != nil { return err } defer agent.unlock() tablet, err := agent.TopoServer.GetTablet(ctx, agent.TabletAlias) if err != nil { return err } if tablet.Type == topodatapb.TabletType_MASTER { return fmt.Errorf("type MASTER cannot restore from backup, if you really need to do this, restart vttablet in replica mode") } // create the loggers: tee to console and source l := logutil.NewTeeLogger(logutil.NewConsoleLogger(), logger) // now we can run restore err = agent.restoreDataLocked(ctx, l, true /* deleteBeforeRestore */) // re-run health check to be sure to capture any replication delay agent.runHealthCheckLocked() return err }
// ApplyTabletAction applies the provided action to the tablet. func (ar *ActionRepository) ApplyTabletAction(ctx context.Context, actionName string, tabletAlias *topodatapb.TabletAlias, r *http.Request) *ActionResult { result := &ActionResult{ Name: actionName, Parameters: topoproto.TabletAliasString(tabletAlias), } action, ok := ar.tabletActions[actionName] if !ok { result.error("Unknown tablet action") return result } // check the role if action.role != "" { if err := acl.CheckAccessHTTP(r, action.role); err != nil { result.error("Access denied") return result } } // run the action ctx, cancel := context.WithTimeout(ctx, *actionTimeout) wr := wrangler.New(logutil.NewConsoleLogger(), ar.ts, tmclient.NewTabletManagerClient()) output, err := action.method(ctx, wr, tabletAlias, r) cancel() if err != nil { result.error(err.Error()) return result } result.Output = output return result }
func TestTabletExternallyReparentedFailedOldMaster(t *testing.T) { tabletmanager.SetReparentFlags(time.Minute /* finalizeTimeout */) ctx := context.Background() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient(), time.Second) // Create an old master, a new master, and a good slave. oldMaster := NewFakeTablet(t, wr, "cell1", 0, topo.TYPE_MASTER) newMaster := NewFakeTablet(t, wr, "cell1", 1, topo.TYPE_REPLICA) goodSlave := NewFakeTablet(t, wr, "cell1", 2, topo.TYPE_REPLICA) // Reparent to a replica, and pretend the old master is not responding. // On the elected master, we will respond to // TabletActionSlaveWasPromoted newMaster.StartActionLoop(t, wr) defer newMaster.StopActionLoop(t) // On the old master, we will only get a // TabletActionSlaveWasRestarted call, let's just not // respond to it at all // On the good slave, we will respond to // TabletActionSlaveWasRestarted. goodSlave.StartActionLoop(t, wr) defer goodSlave.StopActionLoop(t) // The reparent should work as expected here t.Logf("TabletExternallyReparented(new master) expecting success") tmc := tmclient.NewTabletManagerClient() ti, err := ts.GetTablet(ctx, newMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } waitID := makeWaitID() if err := tmc.TabletExternallyReparented(context.Background(), ti, waitID); err != nil { t.Fatalf("TabletExternallyReparented(replica) failed: %v", err) } waitForExternalReparent(t, waitID) // Now double-check the serving graph is good. // Should only have one good replica left. addrs, _, err := ts.GetEndPoints(ctx, "cell1", "test_keyspace", "0", topo.TYPE_REPLICA) if err != nil { t.Fatalf("GetEndPoints failed at the end: %v", err) } if len(addrs.Entries) != 1 { t.Fatalf("GetEndPoints has too many entries: %v", addrs) } // check the old master was converted to spare tablet, err := ts.GetTablet(ctx, oldMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet(%v) failed: %v", oldMaster.Tablet.Alias, err) } if tablet.Type != topo.TYPE_SPARE { t.Fatalf("old master should be spare but is: %v", tablet.Type) } }
func main() { flag.Parse() args := flag.Args() installSignalHandlers() servenv.Init() defer servenv.Close() ts := topo.GetServer() defer topo.CloseServers() // the logger will be replaced when we start a job wr = wrangler.New(logutil.NewConsoleLogger(), ts, 30*time.Second, 30*time.Second) if len(args) == 0 { // interactive mode, initialize the web UI to chose a command initInteractiveMode() } else { // single command mode, just runs it runCommand(args) } initStatusHandling() servenv.RunDefault() }
func main() { flag.Parse() ctx, cancel := context.WithTimeout(context.Background(), *actionTimeout) defer cancel() sigChan := make(chan os.Signal, 1) signal.Notify(sigChan, syscall.SIGTERM, syscall.SIGINT) go func() { s := <-sigChan log.Errorf("Trying to cancel current command after receiving signal: %v", s) cancel() }() logger := logutil.NewConsoleLogger() err := vtworkerclient.RunCommandAndWait( ctx, *server, flag.Args(), func(e *logutilpb.Event) { logutil.LogEvent(logger, e) }) if err != nil { log.Error(err) os.Exit(1) } }
// TestNewRestartableResultReader tests the correct error handling e.g. // if the connection to a tablet fails due to a canceled context. func TestNewRestartableResultReader(t *testing.T) { wantErr := errors.New("restartable_result_reader_test.go: context canceled") tabletconn.RegisterDialer("fake_dialer", func(tablet *topodatapb.Tablet, timeout time.Duration) (tabletconn.TabletConn, error) { return nil, wantErr }) protocol := flag.CommandLine.Lookup("tablet_protocol").Value.String() flag.Set("tablet_protocol", "fake_dialer") // Restore the previous flag value after the test. defer flag.Set("tablet_protocol", protocol) // Create dependencies e.g. a "singleTabletProvider" instance. ts := zktestserver.New(t, []string{"cell1"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) alias := &topodatapb.TabletAlias{ Cell: "cell1", Uid: 1, } tablet := &topodatapb.Tablet{ Keyspace: "ks1", Shard: "-80", Alias: alias, } ctx := context.Background() if err := ts.CreateTablet(ctx, tablet); err != nil { t.Fatalf("CreateTablet failed: %v", err) } tp := newSingleTabletProvider(ctx, ts, alias) _, err := NewRestartableResultReader(ctx, wr.Logger(), tp, nil /* td */, chunk{}, false) if err == nil || !strings.Contains(err.Error(), wantErr.Error()) { t.Fatalf("NewRestartableResultReader() should have failed because the context is canceled: %v", err) } }
// ExecuteVtctlCommand is part of the pb.VtctlServer interface func (s *VtctlServer) ExecuteVtctlCommand(args *pb.ExecuteVtctlCommandRequest, stream pbs.Vtctl_ExecuteVtctlCommandServer) (err error) { defer servenv.HandlePanic("vtctl", &err) // create a logger, send the result back to the caller logstream := logutil.NewChannelLogger(10) logger := logutil.NewTeeLogger(logstream, logutil.NewConsoleLogger()) // send logs to the caller wg := sync.WaitGroup{} wg.Add(1) go func() { for e := range logstream { // Note we don't interrupt the loop here, as // we still need to flush and finish the // command, even if the channel to the client // has been broken. We'll just keep trying. stream.Send(&pb.ExecuteVtctlCommandResponse{ Event: logutil.LoggerEventToProto(&e), }) } wg.Done() }() // create the wrangler wr := wrangler.New(logger, s.ts, tmclient.NewTabletManagerClient(), time.Duration(args.LockTimeout)) // execute the command err = vtctl.RunCommand(stream.Context(), wr, args.Args) // close the log channel, and wait for them all to be sent close(logstream) wg.Wait() return err }
// MultiSnapshot takes a multi-part snapshot // Should be called under RpcWrapLockAction. func (agent *ActionAgent) MultiSnapshot(args *actionnode.MultiSnapshotArgs, logger logutil.Logger) (*actionnode.MultiSnapshotReply, error) { tablet, err := agent.TopoServer.GetTablet(agent.TabletAlias) if err != nil { return nil, err } ki, err := agent.TopoServer.GetKeyspace(tablet.Keyspace) if err != nil { return nil, err } if tablet.Type != topo.TYPE_BACKUP { return nil, fmt.Errorf("expected backup type, not %v", tablet.Type) } // create the loggers: tee to console and source l := logutil.NewTeeLogger(logutil.NewConsoleLogger(), logger) filenames, err := agent.Mysqld.CreateMultiSnapshot(l, args.KeyRanges, tablet.DbName(), ki.ShardingColumnName, ki.ShardingColumnType, tablet.Addr(), false, args.Concurrency, args.Tables, args.ExcludeTables, args.SkipSlaveRestart, args.MaximumFilesize, agent.hookExtraEnv()) if err != nil { return nil, err } sr := &actionnode.MultiSnapshotReply{ManifestPaths: filenames} if tablet.Parent.Uid == topo.NO_TABLET { // If this is a master, this will be the new parent. // FIXME(msolomon) this doens't work in hierarchical replication. sr.ParentAlias = tablet.Alias } else { sr.ParentAlias = tablet.Parent } return sr, nil }
// ExecuteVtctlCommand is the server side method that will execute the query, // and stream the results. func (s *VtctlServer) ExecuteVtctlCommand(context context.Context, query *gorpcproto.ExecuteVtctlCommandArgs, sendReply func(interface{}) error) error { // create a logger, send the result back to the caller logstream := logutil.NewChannelLogger(10) logger := logutil.NewTeeLogger(logstream, logutil.NewConsoleLogger()) // send logs to the caller wg := sync.WaitGroup{} wg.Add(1) go func() { for e := range logstream { // Note we don't interrupt the loop here, as // we still need to flush and finish the // command, even if the channel to the client // has been broken. We'll just keep trying. sendReply(&e) } wg.Done() }() // create the wrangler wr := wrangler.New(logger, s.ts, query.ActionTimeout, query.LockTimeout) // execute the command err := vtctl.RunCommand(wr, query.Args) // close the log channel, and wait for them all to be sent close(logstream) wg.Wait() return err }
// TestInitMasterShardChecks makes sure the safety checks work func TestInitMasterShardChecks(t *testing.T) { ctx := context.Background() db := fakesqldb.Register() ts := zktestserver.New(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) master := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db) // InitShardMaster with an unknown tablet if err := wr.InitShardMaster(ctx, master.Tablet.Keyspace, master.Tablet.Shard, &topodatapb.TabletAlias{ Cell: master.Tablet.Alias.Cell, Uid: master.Tablet.Alias.Uid + 1, }, false /*force*/, 10*time.Second); err == nil || !strings.Contains(err.Error(), "is not in the shard") { t.Errorf("InitShardMaster with unknown alias returned wrong error: %v", err) } // InitShardMaster with two masters in the shard, no force flag // (master2 needs to run InitTablet with -force, as it is the second // master in the same shard) master2 := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_MASTER, db, ForceInitTablet()) if err := wr.InitShardMaster(ctx, master2.Tablet.Keyspace, master2.Tablet.Shard, master2.Tablet.Alias, false /*force*/, 10*time.Second); err == nil || !strings.Contains(err.Error(), "is not the only master in the shard") { t.Errorf("InitShardMaster with two masters returned wrong error: %v", err) } // InitShardMaster where the new master fails (use force flag // as we have 2 masters). We force the failure by making the // SQL commands executed on the master unexpected by the test fixture master.StartActionLoop(t, wr) defer master.StopActionLoop(t) master2.StartActionLoop(t, wr) defer master2.StopActionLoop(t) if err := wr.InitShardMaster(ctx, master.Tablet.Keyspace, master.Tablet.Shard, master.Tablet.Alias, true /*force*/, 10*time.Second); err == nil || !strings.Contains(err.Error(), "unexpected extra query") { t.Errorf("InitShardMaster with new master failing in new master InitMaster returned wrong error: %v", err) } }
func TestShardExternallyReparentedFailedOldMaster(t *testing.T) { ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, time.Minute, time.Second) // Create an old master, a new master, two good slaves oldMaster := NewFakeTablet(t, wr, "cell1", 0, topo.TYPE_MASTER) newMaster := NewFakeTablet(t, wr, "cell1", 1, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) goodSlave := NewFakeTablet(t, wr, "cell1", 2, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) // Reparent to a replica, and pretend the old master is not responding // On the elected master, we will respond to // TABLET_ACTION_SLAVE_WAS_PROMOTED newMaster.FakeMysqlDaemon.MasterAddr = "" newMaster.StartActionLoop(t, wr) defer newMaster.StopActionLoop(t) // On the old master, we will only get a // TABLET_ACTION_SLAVE_WAS_RESTARTED call, let's just not // respond to it at all // On the good slave, we will respond to // TABLET_ACTION_SLAVE_WAS_RESTARTED. goodSlave.FakeMysqlDaemon.MasterAddr = newMaster.Tablet.MysqlIpAddr() goodSlave.StartActionLoop(t, wr) defer goodSlave.StopActionLoop(t) // The reparent should work as expected here t.Logf("ShardExternallyReparented(new master) expecting success") if err := wr.ShardExternallyReparented("test_keyspace", "0", newMaster.Tablet.Alias); err != nil { t.Fatalf("ShardExternallyReparented(replica) failed: %v", err) } // Now double-check the serving graph is good. // Should only have one good replica left. addrs, err := ts.GetEndPoints("cell1", "test_keyspace", "0", topo.TYPE_REPLICA) if err != nil { t.Fatalf("GetEndPoints failed at the end: %v", err) } if len(addrs.Entries) != 1 { t.Fatalf("GetEndPoints has too many entries: %v", addrs) } // check the old master was converted to spare tablet, err := ts.GetTablet(oldMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet(%v) failed: %v", oldMaster.Tablet.Alias, err) } if tablet.Type != topo.TYPE_SPARE { t.Fatalf("old master should be spare but is: %v", tablet.Type) } if tablet.Parent != newMaster.Tablet.Alias { t.Fatalf("old master has the wrong master, got %v expected %v", tablet.Parent, newMaster.Tablet.Alias) } }
func init() { logger := logutil.NewConsoleLogger() flag.CommandLine.SetOutput(logutil.NewLoggerWriter(logger)) flag.Usage = func() { logger.Printf("Usage: %s [global parameters] command [command parameters]\n", os.Args[0]) logger.Printf("\nThe global optional parameters are:\n") flag.PrintDefaults() logger.Printf("\nThe commands are listed below, sorted by group. Use '%s <command> -h' for more help.\n\n", os.Args[0]) vtctl.PrintAllCommands(logger) } }
func TestTabletExecutorOpenWithEmptyMasterAlias(t *testing.T) { ft := newFakeTopo() ft.Impl.(*fakeTopo).WithEmptyMasterAlias = true wr := wrangler.New(logutil.NewConsoleLogger(), ft, newFakeTabletManagerClient()) executor := NewTabletExecutor(wr, testWaitSlaveTimeout) ctx := context.Background() if err := executor.Open(ctx, "test_keyspace"); err == nil { t.Fatalf("executor.Open() = nil, want error") } executor.Close() }
// Backup takes a db backup and sends it to the BackupStorage func (agent *ActionAgent) Backup(ctx context.Context, concurrency int, logger logutil.Logger) error { if err := agent.lock(ctx); err != nil { return err } defer agent.unlock() // update our type to BACKUP tablet, err := agent.TopoServer.GetTablet(ctx, agent.TabletAlias) if err != nil { return err } if tablet.Type == topodatapb.TabletType_MASTER { return fmt.Errorf("type MASTER cannot take backup, if you really need to do this, restart vttablet in replica mode") } originalType := tablet.Type if _, err := topotools.ChangeType(ctx, agent.TopoServer, tablet.Alias, topodatapb.TabletType_BACKUP); err != nil { return err } // let's update our internal state (stop query service and other things) if err := agent.refreshTablet(ctx, "before backup"); err != nil { return err } // create the loggers: tee to console and source l := logutil.NewTeeLogger(logutil.NewConsoleLogger(), logger) // now we can run the backup dir := fmt.Sprintf("%v/%v", tablet.Keyspace, tablet.Shard) name := fmt.Sprintf("%v.%v", time.Now().UTC().Format("2006-01-02.150405"), topoproto.TabletAliasString(tablet.Alias)) returnErr := mysqlctl.Backup(ctx, agent.MysqlDaemon, l, dir, name, concurrency, agent.hookExtraEnv()) // change our type back to the original value _, err = topotools.ChangeType(ctx, agent.TopoServer, tablet.Alias, originalType) if err != nil { // failure in changing the topology type is probably worse, // so returning that (we logged the snapshot error anyway) if returnErr != nil { l.Errorf("mysql backup command returned error: %v", returnErr) } returnErr = err } // let's update our internal state (start query service and other things) if err := agent.refreshTablet(ctx, "after backup"); err != nil { return err } // and re-run health check to be sure to capture any replication delay agent.runHealthCheckLocked() return returnErr }
func TestSchemaManagerExecutorExecuteFail(t *testing.T) { controller := newFakeController( []string{"create table test_table (pk int);"}, false, false, false) wr := wrangler.New(logutil.NewConsoleLogger(), newFakeTopo(), newFakeTabletManagerClient()) executor := NewTabletExecutor(wr, testWaitSlaveTimeout) ctx := context.Background() err := Run(ctx, controller, executor) if err == nil { t.Fatalf("run schema change should fail due to executor.Execute fail") } }
// rebuildShardIfNeeded will rebuild the serving graph if we need to func (agent *ActionAgent) rebuildShardIfNeeded(tablet *topo.TabletInfo, targetTabletType topo.TabletType) error { if topo.IsInServingGraph(targetTabletType) { // TODO: interrupted may need to be a global one closed when we exit interrupted := make(chan struct{}) // no need to take the shard lock in this case if err := topotools.RebuildShard(logutil.NewConsoleLogger(), agent.TopoServer, tablet.Keyspace, tablet.Shard, []string{tablet.Alias.Cell}, agent.LockTimeout, interrupted); err != nil { return fmt.Errorf("topotools.RebuildShard returned an error: %v", err) } } return nil }
func TestTabletData(t *testing.T) { db := fakesqldb.Register() ts := zktestserver.New(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) if err := ts.CreateKeyspace(context.Background(), "ks", &topodatapb.Keyspace{ ShardingColumnName: "keyspace_id", ShardingColumnType: topodatapb.KeyspaceIdType_UINT64, }); err != nil { t.Fatalf("CreateKeyspace failed: %v", err) } tablet1 := testlib.NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db, testlib.TabletKeyspaceShard(t, "ks", "-80")) tablet1.StartActionLoop(t, wr) defer tablet1.StopActionLoop(t) shsq := newStreamHealthTabletServer(t) grpcqueryservice.Register(tablet1.RPCServer, shsq) thc := newTabletHealthCache(ts) stats := &querypb.RealtimeStats{ HealthError: "testHealthError", SecondsBehindMaster: 72, CpuUsage: 1.1, } // Keep broadcasting until the first result goes through. stop := make(chan struct{}) go func() { for { select { case <-stop: return default: shsq.BroadcastHealth(42, stats) } } }() // Start streaming and wait for the first result. ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) result, err := thc.Get(ctx, tablet1.Tablet.Alias) cancel() close(stop) if err != nil { t.Fatalf("thc.Get failed: %v", err) } if got, want := result.RealtimeStats, stats; !proto.Equal(got, want) { t.Errorf("RealtimeStats = %#v, want %#v", got, want) } }
func main() { defer exit.RecoverAll() defer logutil.Flush() flag.Parse() args := flag.Args() if len(args) == 0 { flag.Usage() exit.Return(1) } action := args[0] installSignalHandlers() startMsg := fmt.Sprintf("USER=%v SUDO_USER=%v %v", os.Getenv("USER"), os.Getenv("SUDO_USER"), strings.Join(os.Args, " ")) if syslogger, err := syslog.New(syslog.LOG_INFO, "vtctl "); err == nil { syslogger.Info(startMsg) } else { log.Warningf("cannot connect to syslog: %v", err) } topoServer := topo.GetServer() defer topo.CloseServers() wr := wrangler.New(logutil.NewConsoleLogger(), topoServer, *waitTime, *lockWaitTimeout) actionPath, err := vtctl.RunCommand(wr, args) switch err { case vtctl.ErrUnknownCommand: flag.Usage() exit.Return(1) case nil: // keep going default: log.Errorf("action failed: %v %v", action, err) exit.Return(255) } if actionPath != "" { if *noWaitForAction { fmt.Println(actionPath) } else { err := wr.WaitForCompletion(actionPath) if err != nil { log.Error(err.Error()) exit.Return(255) } else { log.Infof("action completed: %v", actionPath) } } } }
// New creates a topology fixture. func New(t *testing.T, cells []string) *Fixture { ts := zktopo.NewTestServer(t, cells) wr := wrangler.New(logutil.NewConsoleLogger(), ts, 1*time.Second, 1*time.Second) return &Fixture{ T: t, Topo: ts, Wrangler: wr, done: make(chan struct{}, 1), tablets: make(map[int]*tabletPack), } }
func TestTabletExternallyReparentedFailedOldMaster(t *testing.T) { tabletmanager.SetReparentFlags(time.Minute /* finalizeTimeout */) ctx := context.Background() db := fakesqldb.Register() ts := zktestserver.New(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) // Create an old master, a new master, and a good slave. oldMaster := NewFakeTablet(t, wr, "cell1", 0, topodatapb.TabletType_MASTER, db) newMaster := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_REPLICA, db) goodSlave := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, db) // Reparent to a replica, and pretend the old master is not responding. // On the elected master, we will respond to // TabletActionSlaveWasPromoted newMaster.StartActionLoop(t, wr) defer newMaster.StopActionLoop(t) // On the old master, we will only get a // TabletActionSlaveWasRestarted call, let's just not // respond to it at all // On the good slave, we will respond to // TabletActionSlaveWasRestarted. goodSlave.StartActionLoop(t, wr) defer goodSlave.StopActionLoop(t) // The reparent should work as expected here t.Logf("TabletExternallyReparented(new master) expecting success") tmc := tmclient.NewTabletManagerClient() ti, err := ts.GetTablet(ctx, newMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } waitID := makeWaitID() if err := tmc.TabletExternallyReparented(context.Background(), ti.Tablet, waitID); err != nil { t.Fatalf("TabletExternallyReparented(replica) failed: %v", err) } waitForExternalReparent(t, waitID) // check the old master was converted to replica tablet, err := ts.GetTablet(ctx, oldMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet(%v) failed: %v", oldMaster.Tablet.Alias, err) } if tablet.Type != topodatapb.TabletType_REPLICA { t.Fatalf("old master should be spare but is: %v", tablet.Type) } }
func snapshotCmd(mysqld *mysqlctl.Mysqld, subFlags *flag.FlagSet, args []string) { concurrency := subFlags.Int("concurrency", 4, "how many compression jobs to run simultaneously") subFlags.Parse(args) if subFlags.NArg() != 1 { log.Fatalf("Command snapshot requires <db name>") } filename, _, _, err := mysqld.CreateSnapshot(logutil.NewConsoleLogger(), subFlags.Arg(0), tabletAddr, false, *concurrency, false, nil) if err != nil { log.Fatalf("snapshot failed: %v", err) } else { log.Infof("manifest location: %v", filename) } }
func multiRestoreCmd(mysqld *mysqlctl.Mysqld, subFlags *flag.FlagSet, args []string) { starts := subFlags.String("starts", "", "starts of the key range") ends := subFlags.String("ends", "", "ends of the key range") fetchRetryCount := subFlags.Int("fetch_retry_count", 3, "how many times to retry a failed transfer") concurrency := subFlags.Int("concurrency", 8, "how many concurrent db inserts to run simultaneously") fetchConcurrency := subFlags.Int("fetch_concurrency", 4, "how many files to fetch simultaneously") insertTableConcurrency := subFlags.Int("insert_table_concurrency", 4, "how many myisam tables to load into a single destination table simultaneously") strategy := subFlags.String("strategy", "", "which strategy to use for restore, can contain:\n"+ " skipAutoIncrement(TTT): we won't add the AUTO_INCREMENT back to that table\n"+ " delayPrimaryKey: we won't add the primary key until after the table is populated\n"+ " delaySecondaryIndexes: we won't add the secondary indexes until after the table is populated\n"+ " useMyIsam: create the table as MyISAM, then convert it to InnoDB after population\n"+ " writeBinLogs: write all operations to the binlogs") subFlags.Parse(args) if subFlags.NArg() < 2 { log.Fatalf("multirestore requires <destination_dbname> <source_host>[/<source_dbname>]... %v", args) } startArray := strings.Split(*starts, ",") endArray := strings.Split(*ends, ",") if len(startArray) != len(endArray) || len(startArray) != subFlags.NArg()-1 { log.Fatalf("Need as many starts and ends as source URLs") } keyRanges := make([]key.KeyRange, len(startArray)) for i, s := range startArray { var err error keyRanges[i], err = key.ParseKeyRangeParts(s, endArray[i]) if err != nil { log.Fatalf("Invalid start or end: %v", err) } } dbName, dbis := subFlags.Arg(0), subFlags.Args()[1:] sources := make([]*url.URL, len(dbis)) for i, dbi := range dbis { if !strings.HasPrefix(dbi, "vttp://") && !strings.HasPrefix(dbi, "http://") { dbi = "vttp://" + dbi } dbUrl, err := url.Parse(dbi) if err != nil { log.Fatalf("incorrect source url: %v", err) } sources[i] = dbUrl } if err := mysqld.MultiRestore(logutil.NewConsoleLogger(), dbName, keyRanges, sources, nil, *concurrency, *fetchConcurrency, *insertTableConcurrency, *fetchRetryCount, *strategy); err != nil { log.Fatalf("multirestore failed: %v", err) } }
func snapshotSourceStartCmd(mysqld *mysqlctl.Mysqld, subFlags *flag.FlagSet, args []string) { concurrency := subFlags.Int("concurrency", 4, "how many checksum jobs to run simultaneously") subFlags.Parse(args) if subFlags.NArg() != 1 { log.Fatalf("Command snapshotsourcestart requires <db name>") } filename, slaveStartRequired, readOnly, err := mysqld.CreateSnapshot(logutil.NewConsoleLogger(), subFlags.Arg(0), tabletAddr, false, *concurrency, true, nil) if err != nil { log.Fatalf("snapshot failed: %v", err) } else { log.Infof("manifest location: %v", filename) log.Infof("slave start required: %v", slaveStartRequired) log.Infof("read only: %v", readOnly) } }
func TestReparentTablet(t *testing.T) { ctx := context.Background() db := fakesqldb.Register() ts := zktestserver.New(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient()) // create shard and tablets if err := ts.CreateShard(ctx, "test_keyspace", "0"); err != nil { t.Fatalf("CreateShard failed: %v", err) } master := NewFakeTablet(t, wr, "cell1", 1, topodatapb.TabletType_MASTER, db) slave := NewFakeTablet(t, wr, "cell1", 2, topodatapb.TabletType_REPLICA, db) // mark the master inside the shard si, err := ts.GetShard(ctx, "test_keyspace", "0") if err != nil { t.Fatalf("GetShard failed: %v", err) } si.MasterAlias = master.Tablet.Alias if err := ts.UpdateShard(ctx, si); err != nil { t.Fatalf("UpdateShard failed: %v", err) } // master action loop (to initialize host and port) master.StartActionLoop(t, wr) defer master.StopActionLoop(t) // slave loop slave.FakeMysqlDaemon.SetMasterCommandsInput = fmt.Sprintf("%v:%v", master.Tablet.Hostname, master.Tablet.PortMap["mysql"]) slave.FakeMysqlDaemon.SetMasterCommandsResult = []string{"set master cmd 1"} slave.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "set master cmd 1", } slave.StartActionLoop(t, wr) defer slave.StopActionLoop(t) // run ReparentTablet if err := wr.ReparentTablet(ctx, slave.Tablet.Alias); err != nil { t.Fatalf("ReparentTablet failed: %v", err) } // check what was run if err := slave.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("slave.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } checkSemiSyncEnabled(t, false, true, slave) }