// RefreshTablesByShard calls RefreshState on all the tables of a // given type in a shard. It would work for the master, but the // discovery wouldn't be very efficient. func (wr *Wrangler) RefreshTablesByShard(si *topo.ShardInfo, tabletType topo.TabletType, cells []string) error { wr.Logger().Infof("RefreshTablesByShard called on shard %v/%v", si.Keyspace(), si.ShardName()) tabletMap, err := topo.GetTabletMapForShardByCell(context.TODO(), wr.ts, si.Keyspace(), si.ShardName(), cells) switch err { case nil: // keep going case topo.ErrPartialResult: wr.Logger().Warningf("RefreshTablesByShard: got partial result for shard %v/%v, may not refresh all tablets everywhere", si.Keyspace(), si.ShardName()) default: return err } // ignore errors in this phase wg := sync.WaitGroup{} for _, ti := range tabletMap { if ti.Type != tabletType { continue } wg.Add(1) go func(ti *topo.TabletInfo) { wr.Logger().Infof("Calling RefreshState on tablet %v", ti.Alias) if err := wr.tmc.RefreshState(wr.ctx, ti); err != nil { wr.Logger().Warningf("RefreshTablesByShard: failed to refresh %v: %v", ti.Alias, err) } wg.Done() }(ti) } wg.Wait() return nil }
func TestTabletExternallyReparented(t *testing.T) { ctx := context.Background() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, time.Minute, time.Second) // Create an old master, a new master, two good slaves, one bad slave oldMaster := NewFakeTablet(t, wr, "cell1", 0, topo.TYPE_MASTER) newMaster := NewFakeTablet(t, wr, "cell1", 1, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) goodSlave2 := NewFakeTablet(t, wr, "cell2", 3, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) badSlave := NewFakeTablet(t, wr, "cell1", 4, topo.TYPE_REPLICA, TabletParent(oldMaster.Tablet.Alias)) // Add a new Cell to the Shard, that doesn't map to any read topo cell, // to simulate a data center being unreachable. si, err := ts.GetShard("test_keyspace", "0") if err != nil { t.Fatalf("GetShard failed: %v", err) } si.Cells = append(si.Cells, "cell666") if err := topo.UpdateShard(ctx, ts, si); err != nil { t.Fatalf("UpdateShard failed: %v", err) } // Slightly unrelated test: make sure we can find the tablets // even with a datacenter being down. tabletMap, err := topo.GetTabletMapForShardByCell(ctx, ts, "test_keyspace", "0", []string{"cell1"}) if err != nil { t.Fatalf("GetTabletMapForShardByCell should have worked but got: %v", err) } master, err := topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"]) if err != nil || master != oldMaster.Tablet.Alias { t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master) } slave1, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave1.Tablet.IPAddr, "vt", goodSlave1.Tablet.Portmap["vt"]) if err != nil || slave1 != goodSlave1.Tablet.Alias { t.Fatalf("FindTabletByIPAddrAndPort(slave1) failed: %v %v", err, master) } slave2, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave2.Tablet.IPAddr, "vt", goodSlave2.Tablet.Portmap["vt"]) if err != topo.ErrNoNode { t.Fatalf("FindTabletByIPAddrAndPort(slave2) worked: %v %v", err, slave2) } // Make sure the master is not exported in other cells tabletMap, err = topo.GetTabletMapForShardByCell(ctx, ts, "test_keyspace", "0", []string{"cell2"}) master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"]) if err != topo.ErrNoNode { t.Fatalf("FindTabletByIPAddrAndPort(master) worked in cell2: %v %v", err, master) } tabletMap, err = topo.GetTabletMapForShard(ctx, ts, "test_keyspace", "0") if err != topo.ErrPartialResult { t.Fatalf("GetTabletMapForShard should have returned ErrPartialResult but got: %v", err) } master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.IPAddr, "vt", oldMaster.Tablet.Portmap["vt"]) if err != nil || master != oldMaster.Tablet.Alias { t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master) } // On the elected master, we will respond to // TABLET_ACTION_SLAVE_WAS_PROMOTED newMaster.FakeMysqlDaemon.MasterAddr = "" newMaster.StartActionLoop(t, wr) defer newMaster.StopActionLoop(t) // On the old master, we will only respond to // TABLET_ACTION_SLAVE_WAS_RESTARTED. oldMaster.FakeMysqlDaemon.MasterAddr = newMaster.Tablet.MysqlIpAddr() oldMaster.StartActionLoop(t, wr) defer oldMaster.StopActionLoop(t) // On the good slaves, we will respond to // TABLET_ACTION_SLAVE_WAS_RESTARTED. goodSlave1.FakeMysqlDaemon.MasterAddr = newMaster.Tablet.MysqlIpAddr() goodSlave1.StartActionLoop(t, wr) defer goodSlave1.StopActionLoop(t) goodSlave2.FakeMysqlDaemon.MasterAddr = newMaster.Tablet.MysqlIpAddr() goodSlave2.StartActionLoop(t, wr) defer goodSlave2.StopActionLoop(t) // On the bad slave, we will respond to // TABLET_ACTION_SLAVE_WAS_RESTARTED with bad data. badSlave.FakeMysqlDaemon.MasterAddr = "234.0.0.1:3301" badSlave.StartActionLoop(t, wr) defer badSlave.StopActionLoop(t) // First test: reparent to the same master, make sure it works // as expected. tmc := tmclient.NewTabletManagerClient() ti, err := ts.GetTablet(oldMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if err := tmc.TabletExternallyReparented(wr.Context(), ti, ""); err != nil { t.Fatalf("TabletExternallyReparented(same master) should have worked") } // Second test: reparent to a replica, and pretend the old // master is still good to go. // This tests a bad case; the new designated master is a slave, // but we should do what we're told anyway ti, err = ts.GetTablet(goodSlave1.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if err := tmc.TabletExternallyReparented(wr.Context(), ti, ""); err != nil { t.Fatalf("TabletExternallyReparented(slave) error: %v", err) } // This tests the good case, where everything works as planned t.Logf("TabletExternallyReparented(new master) expecting success") ti, err = ts.GetTablet(newMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if err := tmc.TabletExternallyReparented(wr.Context(), ti, ""); err != nil { t.Fatalf("TabletExternallyReparented(replica) failed: %v", err) } // Now double-check the serving graph is good. // Should only have one good replica left. addrs, err := ts.GetEndPoints("cell1", "test_keyspace", "0", topo.TYPE_REPLICA) if err != nil { t.Fatalf("GetEndPoints failed at the end: %v", err) } if len(addrs.Entries) != 1 { t.Fatalf("GetEndPoints has too many entries: %v", addrs) } }