func TestCopySchemaShard(t *testing.T) { ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, time.Minute, time.Second) sourceMaster := NewFakeTablet(t, wr, "cell1", 0, topo.TYPE_MASTER, TabletKeyspaceShard(t, "ks", "-80")) sourceRdonly := NewFakeTablet(t, wr, "cell1", 1, topo.TYPE_RDONLY, TabletKeyspaceShard(t, "ks", "-80"), TabletParent(sourceMaster.Tablet.Alias)) destinationMaster := NewFakeTablet(t, wr, "cell1", 10, topo.TYPE_MASTER, TabletKeyspaceShard(t, "ks", "-40")) // one destination RdOnly, so we know that schema copies propogate from masters destinationRdonly := NewFakeTablet(t, wr, "cell1", 11, topo.TYPE_RDONLY, TabletKeyspaceShard(t, "ks", "-40"), TabletParent(destinationMaster.Tablet.Alias)) for _, ft := range []*FakeTablet{sourceMaster, sourceRdonly, destinationMaster, destinationRdonly} { ft.StartActionLoop(t, wr) defer ft.StopActionLoop(t) } sourceRdonly.FakeMysqlDaemon.Schema = &myproto.SchemaDefinition{ DatabaseSchema: "CREATE DATABASE `{{.DatabaseName}}` /*!40100 DEFAULT CHARACTER SET utf8 */", TableDefinitions: []*myproto.TableDefinition{ &myproto.TableDefinition{ Name: "table1", Schema: "CREATE TABLE `resharding1` (\n `id` bigint(20) NOT NULL AUTO_INCREMENT,\n `msg` varchar(64) DEFAULT NULL,\n `keyspace_id` bigint(20) unsigned NOT NULL,\n PRIMARY KEY (`id`),\n KEY `by_msg` (`msg`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8", Type: myproto.TABLE_BASE_TABLE, }, &myproto.TableDefinition{ Name: "view1", Schema: "CREATE TABLE `view1` (\n `id` bigint(20) NOT NULL AUTO_INCREMENT,\n `msg` varchar(64) DEFAULT NULL,\n `keyspace_id` bigint(20) unsigned NOT NULL,\n PRIMARY KEY (`id`),\n KEY `by_msg` (`msg`)\n) ENGINE=InnoDB DEFAULT CHARSET=utf8", Type: myproto.TABLE_VIEW, }, }, } destinationMaster.FakeMysqlDaemon.DbaConnectionFactory = DestinationsFactory(t) destinationRdonly.FakeMysqlDaemon.DbaConnectionFactory = DestinationsFactory(t) if err := wr.CopySchemaShard(sourceRdonly.Tablet.Alias, nil, nil, true, "ks", "-40"); err != nil { t.Fatalf("CopySchemaShard failed: %v", err) } }
// TestTabletExternallyReparentedWithDifferentMysqlPort makes sure // that if mysql is restarted on the master-elect tablet and has a different // port, we pick it up correctly. func TestTabletExternallyReparentedWithDifferentMysqlPort(t *testing.T) { ts := zktopo.NewTestServer(t, []string{"cell1"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, time.Minute, time.Second) // Create an old master, a new master, two good slaves, one bad slave oldMaster := NewFakeTablet(t, wr, "cell1", 0, topo.TYPE_MASTER) newMaster := NewFakeTablet(t, wr, "cell1", 1, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) goodSlave := NewFakeTablet(t, wr, "cell1", 2, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) // Now we're restarting mysql on a different port, 3301->3303 // but without updating the Tablet record in topology. // On the elected master, we will respond to // TABLET_ACTION_SLAVE_WAS_PROMOTED, so we need a MysqlDaemon // that returns no master, and the new port (as returned by mysql) newMaster.FakeMysqlDaemon.MasterAddr = "" newMaster.FakeMysqlDaemon.MysqlPort = 3303 newMaster.StartActionLoop(t, wr) defer newMaster.StopActionLoop(t) // On the old master, we will only respond to // TABLET_ACTION_SLAVE_WAS_RESTARTED and point to the new mysql port oldMaster.FakeMysqlDaemon.MasterAddr = "101.0.0.1:3303" oldMaster.StartActionLoop(t, wr) defer oldMaster.StopActionLoop(t) // On the good slaves, we will respond to // TABLET_ACTION_SLAVE_WAS_RESTARTED and point to the new mysql port goodSlave.FakeMysqlDaemon.MasterAddr = "101.0.0.1:3303" goodSlave.StartActionLoop(t, wr) defer goodSlave.StopActionLoop(t) // This tests the good case, where everything works as planned t.Logf("TabletExternallyReparented(new master) expecting success") tmc := tmclient.NewTabletManagerClient() ti, err := ts.GetTablet(newMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if err := tmc.TabletExternallyReparented(wr.Context(), ti, ""); err != nil { t.Fatalf("TabletExternallyReparented(replica) failed: %v", err) } }
// CreateTopoServer returns the test topo server properly configured func CreateTopoServer(t *testing.T) topo.Server { return zktopo.NewTestServer(t, []string{"cell1", "cell2"}) }
func TestRebuildShardRace(t *testing.T) { ctx := context.Background() cells := []string{"test_cell"} logger := logutil.NewMemoryLogger() timeout := 10 * time.Second interrupted := make(chan struct{}) // Set up topology. ts := zktopo.NewTestServer(t, cells) f := faketopo.New(t, logger, ts, cells) defer f.TearDown() keyspace := faketopo.TestKeyspace shard := faketopo.TestShard master := f.AddTablet(1, "test_cell", topo.TYPE_MASTER, nil) f.AddTablet(2, "test_cell", topo.TYPE_REPLICA, master) // Do an initial rebuild. if _, err := RebuildShard(ctx, logger, f.Topo, keyspace, shard, cells, timeout, interrupted); err != nil { t.Fatalf("RebuildShard: %v", err) } // Check initial state. ep, err := ts.GetEndPoints(cells[0], keyspace, shard, topo.TYPE_MASTER) if err != nil { t.Fatalf("GetEndPoints: %v", err) } if got, want := len(ep.Entries), 1; got != want { t.Fatalf("len(Entries) = %v, want %v", got, want) } ep, err = ts.GetEndPoints(cells[0], keyspace, shard, topo.TYPE_REPLICA) if err != nil { t.Fatalf("GetEndPoints: %v", err) } if got, want := len(ep.Entries), 1; got != want { t.Fatalf("len(Entries) = %v, want %v", got, want) } // Install a hook that hands out locks out of order to simulate a race. trigger := make(chan struct{}) stalled := make(chan struct{}) done := make(chan struct{}) wait := make(chan bool, 2) wait <- true // first guy waits for trigger wait <- false // second guy doesn't wait ts.HookLockSrvShardForAction = func() { if <-wait { close(stalled) <-trigger } } // Make a change and start a rebuild that will stall when it tries to get // the SrvShard lock. masterInfo := f.GetTablet(1) masterInfo.Type = topo.TYPE_SPARE if err := topo.UpdateTablet(ctx, ts, masterInfo); err != nil { t.Fatalf("UpdateTablet: %v", err) } go func() { if _, err := RebuildShard(ctx, logger, f.Topo, keyspace, shard, cells, timeout, interrupted); err != nil { t.Fatalf("RebuildShard: %v", err) } close(done) }() // Wait for first rebuild to stall. <-stalled // While the first rebuild is stalled, make another change and start a rebuild // that doesn't stall. replicaInfo := f.GetTablet(2) replicaInfo.Type = topo.TYPE_SPARE if err := topo.UpdateTablet(ctx, ts, replicaInfo); err != nil { t.Fatalf("UpdateTablet: %v", err) } if _, err := RebuildShard(ctx, logger, f.Topo, keyspace, shard, cells, timeout, interrupted); err != nil { t.Fatalf("RebuildShard: %v", err) } // Now that the second rebuild is done, un-stall the first rebuild and wait // for it to finish. close(trigger) <-done // Check that the rebuild picked up both changes. if _, err := ts.GetEndPoints(cells[0], keyspace, shard, topo.TYPE_MASTER); err == nil || !strings.Contains(err.Error(), "node doesn't exist") { t.Errorf("first change wasn't picked up by second rebuild") } if _, err := ts.GetEndPoints(cells[0], keyspace, shard, topo.TYPE_REPLICA); err == nil || !strings.Contains(err.Error(), "node doesn't exist") { t.Errorf("second change was overwritten by first rebuild finishing late") } }
func testSplitClone(t *testing.T, strategy string) { ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, time.Minute, time.Second) sourceMaster := testlib.NewFakeTablet(t, wr, "cell1", 0, topo.TYPE_MASTER, testlib.TabletKeyspaceShard(t, "ks", "-80")) sourceRdonly1 := testlib.NewFakeTablet(t, wr, "cell1", 1, topo.TYPE_RDONLY, testlib.TabletKeyspaceShard(t, "ks", "-80"), testlib.TabletParent(sourceMaster.Tablet.Alias)) sourceRdonly2 := testlib.NewFakeTablet(t, wr, "cell1", 2, topo.TYPE_RDONLY, testlib.TabletKeyspaceShard(t, "ks", "-80"), testlib.TabletParent(sourceMaster.Tablet.Alias)) leftMaster := testlib.NewFakeTablet(t, wr, "cell1", 10, topo.TYPE_MASTER, testlib.TabletKeyspaceShard(t, "ks", "-40")) leftRdonly := testlib.NewFakeTablet(t, wr, "cell1", 11, topo.TYPE_RDONLY, testlib.TabletKeyspaceShard(t, "ks", "-40"), testlib.TabletParent(leftMaster.Tablet.Alias)) rightMaster := testlib.NewFakeTablet(t, wr, "cell1", 20, topo.TYPE_MASTER, testlib.TabletKeyspaceShard(t, "ks", "40-80")) rightRdonly := testlib.NewFakeTablet(t, wr, "cell1", 21, topo.TYPE_RDONLY, testlib.TabletKeyspaceShard(t, "ks", "40-80"), testlib.TabletParent(rightMaster.Tablet.Alias)) for _, ft := range []*testlib.FakeTablet{sourceMaster, sourceRdonly1, sourceRdonly2, leftMaster, leftRdonly, rightMaster, rightRdonly} { ft.StartActionLoop(t, wr) defer ft.StopActionLoop(t) } // add the topo and schema data we'll need if err := topo.CreateShard(ts, "ks", "80-"); err != nil { t.Fatalf("CreateShard(\"-80\") failed: %v", err) } if err := wr.SetKeyspaceShardingInfo("ks", "keyspace_id", key.KIT_UINT64, 4, false); err != nil { t.Fatalf("SetKeyspaceShardingInfo failed: %v", err) } if err := wr.RebuildKeyspaceGraph("ks", nil); err != nil { t.Fatalf("RebuildKeyspaceGraph failed: %v", err) } gwrk, err := NewSplitCloneWorker(wr, "cell1", "ks", "-80", nil, strategy, 10 /*sourceReaderCount*/, 4 /*destinationPackCount*/, 1 /*minTableSizeForSplit*/, 10 /*destinationWriterCount*/) if err != nil { t.Errorf("Worker creation failed: %v", err) } wrk := gwrk.(*SplitCloneWorker) for _, sourceRdonly := range []*testlib.FakeTablet{sourceRdonly1, sourceRdonly2} { sourceRdonly.FakeMysqlDaemon.Schema = &myproto.SchemaDefinition{ DatabaseSchema: "", TableDefinitions: []*myproto.TableDefinition{ &myproto.TableDefinition{ Name: "table1", Columns: []string{"id", "msg", "keyspace_id"}, PrimaryKeyColumns: []string{"id"}, Type: myproto.TABLE_BASE_TABLE, // This informs how many rows we can pack into a single insert DataLength: 2048, }, }, } sourceRdonly.FakeMysqlDaemon.DbaConnectionFactory = SourceRdonlyFactory(t) sourceRdonly.FakeMysqlDaemon.CurrentSlaveStatus = &myproto.ReplicationStatus{ Position: myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{Domain: 12, Server: 34, Sequence: 5678}, }, } sourceRdonly.RpcServer.Register(&SqlQuery{t: t}) } // We read 100 source rows. sourceReaderCount is set to 10, so // we'll have 100/10=10 rows per table chunk. // destinationPackCount is set to 4, so we take 4 source rows // at once. So we'll process 4 + 4 + 2 rows to get to 10. // That means 3 insert statements on each target (each // containing half of the rows, i.e. 2 + 2 + 1 rows). So 3 * 10 // = 30 insert statements on each destination. leftMaster.FakeMysqlDaemon.DbaConnectionFactory = DestinationsFactory(t, 30) leftRdonly.FakeMysqlDaemon.DbaConnectionFactory = DestinationsFactory(t, 30) rightMaster.FakeMysqlDaemon.DbaConnectionFactory = DestinationsFactory(t, 30) rightRdonly.FakeMysqlDaemon.DbaConnectionFactory = DestinationsFactory(t, 30) wrk.Run() status := wrk.StatusAsText() t.Logf("Got status: %v", status) if wrk.err != nil || wrk.state != stateSCDone { t.Errorf("Worker run failed") } }
func TestTabletExternallyReparentedFailedOldMaster(t *testing.T) { ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, time.Minute, time.Second) // Create an old master, a new master, two good slaves oldMaster := NewFakeTablet(t, wr, "cell1", 0, topo.TYPE_MASTER) newMaster := NewFakeTablet(t, wr, "cell1", 1, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) goodSlave := NewFakeTablet(t, wr, "cell1", 2, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) // Reparent to a replica, and pretend the old master is not responding // On the elected master, we will respond to // TABLET_ACTION_SLAVE_WAS_PROMOTED newMaster.FakeMysqlDaemon.MasterAddr = "" newMaster.StartActionLoop(t, wr) defer newMaster.StopActionLoop(t) // On the old master, we will only get a // TABLET_ACTION_SLAVE_WAS_RESTARTED call, let's just not // respond to it at all // On the good slave, we will respond to // TABLET_ACTION_SLAVE_WAS_RESTARTED. goodSlave.FakeMysqlDaemon.MasterAddr = newMaster.Tablet.MysqlIpAddr() goodSlave.StartActionLoop(t, wr) defer goodSlave.StopActionLoop(t) // The reparent should work as expected here t.Logf("TabletExternallyReparented(new master) expecting success") tmc := tmclient.NewTabletManagerClient() ti, err := ts.GetTablet(newMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if err := tmc.TabletExternallyReparented(wr.Context(), ti, ""); err != nil { t.Fatalf("TabletExternallyReparented(replica) failed: %v", err) } // Now double-check the serving graph is good. // Should only have one good replica left. addrs, err := ts.GetEndPoints("cell1", "test_keyspace", "0", topo.TYPE_REPLICA) if err != nil { t.Fatalf("GetEndPoints failed at the end: %v", err) } if len(addrs.Entries) != 1 { t.Fatalf("GetEndPoints has too many entries: %v", addrs) } // check the old master was converted to spare tablet, err := ts.GetTablet(oldMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet(%v) failed: %v", oldMaster.Tablet.Alias, err) } if tablet.Type != topo.TYPE_SPARE { t.Fatalf("old master should be spare but is: %v", tablet.Type) } if tablet.Parent != newMaster.Tablet.Alias { t.Fatalf("old master has the wrong master, got %v expected %v", tablet.Parent, newMaster.Tablet.Alias) } }
func TestTabletExternallyReparented(t *testing.T) { ctx := context.Background() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, time.Minute, time.Second) // Create an old master, a new master, two good slaves, one bad slave oldMaster := NewFakeTablet(t, wr, "cell1", 0, topo.TYPE_MASTER) newMaster := NewFakeTablet(t, wr, "cell1", 1, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) goodSlave2 := NewFakeTablet(t, wr, "cell2", 3, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) badSlave := NewFakeTablet(t, wr, "cell1", 4, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) // Add a new Cell to the Shard, that doesn't map to any read topo cell, // to simulate a data center being unreachable. si, err := ts.GetShard("test_keyspace", "0") if err != nil { t.Fatalf("GetShard failed: %v", err) } si.Cells = append(si.Cells, "cell666") if err := topo.UpdateShard(ctx, ts, si); err != nil { t.Fatalf("UpdateShard failed: %v", err) } // Slightly unrelated test: make sure we can find the tablets // even with a datacenter being down. tabletMap, err := topo.GetTabletMapForShardByCell(ctx, ts, "test_keyspace", "0", []string{"cell1"}) if err != nil { t.Fatalf("GetTabletMapForShardByCell should have worked but got: %v", err) } master, err := topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"]) if err != nil || master != oldMaster.Tablet.Alias { t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master) } slave1, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave1.Tablet.IPAddr, "vt", goodSlave1.Tablet.Portmap["vt"]) if err != nil || slave1 != goodSlave1.Tablet.Alias { t.Fatalf("FindTabletByIPAddrAndPort(slave1) failed: %v %v", err, master) } slave2, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave2.Tablet.IPAddr, "vt", goodSlave2.Tablet.Portmap["vt"]) if err != topo.ErrNoNode { t.Fatalf("FindTabletByIPAddrAndPort(slave2) worked: %v %v", err, slave2) } // Make sure the master is not exported in other cells tabletMap, err = topo.GetTabletMapForShardByCell(ctx, ts, "test_keyspace", "0", []string{"cell2"}) master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"]) if err != topo.ErrNoNode { t.Fatalf("FindTabletByIPAddrAndPort(master) worked in cell2: %v %v", err, master) } tabletMap, err = topo.GetTabletMapForShard(ctx, ts, "test_keyspace", "0") if err != topo.ErrPartialResult { t.Fatalf("GetTabletMapForShard should have returned ErrPartialResult but got: %v", err) } master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"]) if err != nil || master != oldMaster.Tablet.Alias { t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master) } // On the elected master, we will respond to // TABLET_ACTION_SLAVE_WAS_PROMOTED newMaster.FakeMysqlDaemon.MasterAddr = "" newMaster.StartActionLoop(t, wr) defer newMaster.StopActionLoop(t) // On the old master, we will only respond to // TABLET_ACTION_SLAVE_WAS_RESTARTED. oldMaster.FakeMysqlDaemon.MasterAddr = newMaster.Tablet.MysqlIpAddr() oldMaster.StartActionLoop(t, wr) defer oldMaster.StopActionLoop(t) // On the good slaves, we will respond to // TABLET_ACTION_SLAVE_WAS_RESTARTED. goodSlave1.FakeMysqlDaemon.MasterAddr = newMaster.Tablet.MysqlIpAddr() goodSlave1.StartActionLoop(t, wr) defer goodSlave1.StopActionLoop(t) goodSlave2.FakeMysqlDaemon.MasterAddr = newMaster.Tablet.MysqlIpAddr() goodSlave2.StartActionLoop(t, wr) defer goodSlave2.StopActionLoop(t) // On the bad slave, we will respond to // TABLET_ACTION_SLAVE_WAS_RESTARTED with bad data. badSlave.FakeMysqlDaemon.MasterAddr = "234.0.0.1:3301" badSlave.StartActionLoop(t, wr) defer badSlave.StopActionLoop(t) // First test: reparent to the same master, make sure it works // as expected. tmc := tmclient.NewTabletManagerClient() ti, err := ts.GetTablet(oldMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if err := tmc.TabletExternallyReparented(wr.Context(), ti, ""); err != nil { t.Fatalf("TabletExternallyReparented(same master) should have worked") } // Second test: reparent to a replica, and pretend the old // master is still good to go. // This tests a bad case; the new designated master is a slave, // but we should do what we're told anyway ti, err = ts.GetTablet(goodSlave1.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if err := tmc.TabletExternallyReparented(wr.Context(), ti, ""); err != nil { t.Fatalf("TabletExternallyReparented(slave) error: %v", err) } // This tests the good case, where everything works as planned t.Logf("TabletExternallyReparented(new master) expecting success") ti, err = ts.GetTablet(newMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if err := tmc.TabletExternallyReparented(wr.Context(), ti, ""); err != nil { t.Fatalf("TabletExternallyReparented(replica) failed: %v", err) } // Now double-check the serving graph is good. // Should only have one good replica left. addrs, err := ts.GetEndPoints("cell1", "test_keyspace", "0", topo.TYPE_REPLICA) if err != nil { t.Fatalf("GetEndPoints failed at the end: %v", err) } if len(addrs.Entries) != 1 { t.Fatalf("GetEndPoints has too many entries: %v", addrs) } }