// updateShardCellsAndMaster will update the 'Cells' and possibly // MasterAlias records for the shard, if needed. func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *pb.TabletAlias, tabletType pb.TabletType, force bool) error { // See if we need to update the Shard: // - add the tablet's cell to the shard's Cells if needed // - change the master if needed shardUpdateRequired := false if !si.HasCell(tabletAlias.Cell) { shardUpdateRequired = true } if tabletType == pb.TabletType_MASTER && !topo.TabletAliasEqual(si.MasterAlias, tabletAlias) { shardUpdateRequired = true } if !shardUpdateRequired { return nil } actionNode := actionnode.UpdateShard() keyspace := si.Keyspace() shard := si.ShardName() lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode) if err != nil { return err } // re-read the shard with the lock si, err = wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) } // update it wasUpdated := false if !si.HasCell(tabletAlias.Cell) { si.Cells = append(si.Cells, tabletAlias.Cell) wasUpdated = true } if tabletType == pb.TabletType_MASTER && !topo.TabletAliasEqual(si.MasterAlias, tabletAlias) { if !topo.TabletAliasIsZero(si.MasterAlias) && !force { return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topo.TabletAliasString(si.MasterAlias), keyspace, shard)) } si.MasterAlias = tabletAlias wasUpdated = true } if wasUpdated { // write it back if err := topo.UpdateShard(ctx, wr.ts, si); err != nil { return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) } } // and unlock return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) }
// ValidateVersionKeyspace validates all versions are the same in all // tablets in a keyspace func (wr *Wrangler) ValidateVersionKeyspace(ctx context.Context, keyspace string) error { // find all the shards shards, err := wr.ts.GetShardNames(ctx, keyspace) if err != nil { return err } // corner cases if len(shards) == 0 { return fmt.Errorf("No shards in keyspace %v", keyspace) } sort.Strings(shards) if len(shards) == 1 { return wr.ValidateVersionShard(ctx, keyspace, shards[0]) } // find the reference version using the first shard's master si, err := wr.ts.GetShard(ctx, keyspace, shards[0]) if err != nil { return err } if topo.TabletAliasIsZero(si.MasterAlias) { return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0]) } referenceAlias := si.MasterAlias log.Infof("Gathering version for reference master %v", topo.TabletAliasString(referenceAlias)) referenceVersion, err := wr.GetVersion(ctx, referenceAlias) if err != nil { return err } // then diff with all tablets but master 0 er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, shard := range shards { aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard) if err != nil { er.RecordError(err) continue } for _, alias := range aliases { if topo.TabletAliasEqual(alias, si.MasterAlias) { continue } wg.Add(1) go wr.diffVersion(ctx, referenceVersion, referenceAlias, alias, &wg, &er) } } wg.Wait() if er.HasErrors() { return fmt.Errorf("Version diffs:\n%v", er.Error().Error()) } return nil }
// FIXME(msolomon) This validate presumes the master is up and running. // Even when that isn't true, there are validation processes that might be valuable. func (wr *Wrangler) validateShard(ctx context.Context, keyspace, shard string, pingTablets bool, wg *sync.WaitGroup, results chan<- error) { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { results <- fmt.Errorf("TopologyServer.GetShard(%v, %v) failed: %v", keyspace, shard, err) return } aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard) if err != nil { results <- fmt.Errorf("TopologyServer.FindAllTabletAliasesInShard(%v, %v) failed: %v", keyspace, shard, err) return } tabletMap, _ := topo.GetTabletMap(ctx, wr.ts, aliases) var masterAlias *pb.TabletAlias for _, alias := range aliases { tabletInfo, ok := tabletMap[*alias] if !ok { results <- fmt.Errorf("tablet %v not found in map", alias) continue } if tabletInfo.Type == pb.TabletType_MASTER { if masterAlias != nil { results <- fmt.Errorf("shard %v/%v already has master %v but found other master %v", keyspace, shard, masterAlias, alias) } else { masterAlias = alias } } } if masterAlias == nil { results <- fmt.Errorf("no master for shard %v/%v", keyspace, shard) } else if !topo.TabletAliasEqual(shardInfo.MasterAlias, masterAlias) { results <- fmt.Errorf("master mismatch for shard %v/%v: found %v, expected %v", keyspace, shard, masterAlias, shardInfo.MasterAlias) } for _, alias := range aliases { wg.Add(1) go func(alias *pb.TabletAlias) { defer wg.Done() if err := topo.Validate(ctx, wr.ts, alias); err != nil { results <- fmt.Errorf("Validate(%v) failed: %v", alias, err) } else { wr.Logger().Infof("tablet %v is valid", alias) } }(alias) } if pingTablets { wr.validateReplication(ctx, shardInfo, tabletMap, results) wr.pingTablets(ctx, tabletMap, wg, results) } return }
// ValidateVersionShard validates all versions are the same in all // tablets in a shard func (wr *Wrangler) ValidateVersionShard(ctx context.Context, keyspace, shard string) error { si, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } // get version from the master, or error if topo.TabletAliasIsZero(si.MasterAlias) { return fmt.Errorf("No master in shard %v/%v", keyspace, shard) } log.Infof("Gathering version for master %v", topo.TabletAliasString(si.MasterAlias)) masterVersion, err := wr.GetVersion(ctx, si.MasterAlias) if err != nil { return err } // read all the aliases in the shard, that is all tablets that are // replicating from the master aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard) if err != nil { return err } // then diff with all slaves er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} for _, alias := range aliases { if topo.TabletAliasEqual(alias, si.MasterAlias) { continue } wg.Add(1) go wr.diffVersion(ctx, masterVersion, si.MasterAlias, alias, &wg, &er) } wg.Wait() if er.HasErrors() { return fmt.Errorf("Version diffs:\n%v", er.Error().Error()) } return nil }
// Scrap a tablet. If force is used, we write to topo.Server // directly and don't remote-execute the command. // // If we scrap the master for a shard, we will clear its record // from the Shard object (only if that was the right master) func (wr *Wrangler) Scrap(ctx context.Context, tabletAlias topo.TabletAlias, force, skipRebuild bool) error { // load the tablet, see if we'll need to rebuild ti, err := wr.ts.GetTablet(ctx, tabletAlias) if err != nil { return err } rebuildRequired := ti.IsInServingGraph() wasMaster := ti.Type == topo.TYPE_MASTER if force { err = topotools.Scrap(ctx, wr.ts, ti.Alias, force) } else { err = wr.tmc.Scrap(ctx, ti) } if err != nil { return err } if !rebuildRequired { wr.Logger().Infof("Rebuild not required") return nil } if skipRebuild { wr.Logger().Warningf("Rebuild required, but skipping it") return nil } // update the Shard object if the master was scrapped if wasMaster { actionNode := actionnode.UpdateShard() lockPath, err := wr.lockShard(ctx, ti.Keyspace, ti.Shard, actionNode) if err != nil { return err } // read the shard with the lock si, err := wr.ts.GetShard(ctx, ti.Keyspace, ti.Shard) if err != nil { return wr.unlockShard(ctx, ti.Keyspace, ti.Shard, actionNode, lockPath, err) } // update it if the right alias is there if topo.TabletAliasEqual(si.MasterAlias, topo.TabletAliasToProto(tabletAlias)) { si.MasterAlias = nil // write it back if err := topo.UpdateShard(ctx, wr.ts, si); err != nil { return wr.unlockShard(ctx, ti.Keyspace, ti.Shard, actionNode, lockPath, err) } } else { wr.Logger().Warningf("Scrapping master %v from shard %v/%v but master in Shard object was %v", tabletAlias, ti.Keyspace, ti.Shard, si.MasterAlias) } // and unlock if err := wr.unlockShard(ctx, ti.Keyspace, ti.Shard, actionNode, lockPath, err); err != nil { return err } } // and rebuild the original shard _, err = wr.RebuildShardGraph(ctx, ti.Keyspace, ti.Shard, []string{ti.Alias.Cell}) return err }
// ValidateSchemaKeyspace will diff the schema from all the tablets in // the keyspace. func (wr *Wrangler) ValidateSchemaKeyspace(ctx context.Context, keyspace string, excludeTables []string, includeViews bool) error { // find all the shards shards, err := wr.ts.GetShardNames(ctx, keyspace) if err != nil { return err } // corner cases if len(shards) == 0 { return fmt.Errorf("No shards in keyspace %v", keyspace) } sort.Strings(shards) if len(shards) == 1 { return wr.ValidateSchemaShard(ctx, keyspace, shards[0], excludeTables, includeViews) } // find the reference schema using the first shard's master si, err := wr.ts.GetShard(ctx, keyspace, shards[0]) if err != nil { return err } if topo.TabletAliasIsZero(si.MasterAlias) { return fmt.Errorf("No master in shard %v/%v", keyspace, shards[0]) } referenceAlias := si.MasterAlias log.Infof("Gathering schema for reference master %v", referenceAlias) referenceSchema, err := wr.GetSchema(ctx, referenceAlias, nil, excludeTables, includeViews) if err != nil { return err } // then diff with all other tablets everywhere er := concurrency.AllErrorRecorder{} wg := sync.WaitGroup{} // first diff the slaves in the reference shard 0 aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shards[0]) if err != nil { return err } for _, alias := range aliases { if topo.TabletAliasEqual(alias, si.MasterAlias) { continue } wg.Add(1) go wr.diffSchema(ctx, referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er) } // then diffs all tablets in the other shards for _, shard := range shards[1:] { si, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { er.RecordError(err) continue } if topo.TabletAliasIsZero(si.MasterAlias) { er.RecordError(fmt.Errorf("No master in shard %v/%v", keyspace, shard)) continue } aliases, err := topo.FindAllTabletAliasesInShard(ctx, wr.ts, keyspace, shard) if err != nil { er.RecordError(err) continue } for _, alias := range aliases { wg.Add(1) go wr.diffSchema(ctx, referenceSchema, referenceAlias, alias, excludeTables, includeViews, &wg, &er) } } wg.Wait() if er.HasErrors() { return fmt.Errorf("Schema diffs:\n%v", er.Error().Error()) } return nil }
// TestInitMasterShard is the good scenario test, where everything // works as planned func TestInitMasterShard(t *testing.T) { ctx := context.Background() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient(), time.Second) vp := NewVtctlPipe(t, ts) defer vp.Close() // Create a master, a couple good slaves master := NewFakeTablet(t, wr, "cell1", 0, pb.TabletType_MASTER) goodSlave1 := NewFakeTablet(t, wr, "cell1", 1, pb.TabletType_REPLICA) goodSlave2 := NewFakeTablet(t, wr, "cell2", 2, pb.TabletType_REPLICA) // Master: set a plausible ReplicationPosition to return, // and expect to add entry in _vt.reparent_journal master.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{ Domain: 5, Server: 456, Sequence: 890, }, } master.FakeMysqlDaemon.ReadOnly = true master.FakeMysqlDaemon.ResetReplicationResult = []string{"reset rep 1"} master.FakeMysqlDaemon.StartReplicationCommandsResult = []string{"new master shouldn't use this"} master.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "reset rep 1", "CREATE DATABASE IF NOT EXISTS _vt", "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", "CREATE DATABASE IF NOT EXISTS _vt", "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, master_alias, replication_position) VALUES", } master.StartActionLoop(t, wr) defer master.StopActionLoop(t) // Slave1: expect to be reset and re-parented goodSlave1.FakeMysqlDaemon.ReadOnly = true goodSlave1.FakeMysqlDaemon.ResetReplicationResult = []string{"reset rep 1"} goodSlave1.FakeMysqlDaemon.StartReplicationCommandsStatus = &myproto.ReplicationStatus{ Position: master.FakeMysqlDaemon.CurrentMasterPosition, MasterHost: master.Tablet.Hostname, MasterPort: int(master.Tablet.PortMap["mysql"]), MasterConnectRetry: 10, } goodSlave1.FakeMysqlDaemon.StartReplicationCommandsResult = []string{"cmd1"} goodSlave1.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "reset rep 1", "cmd1", } goodSlave1.StartActionLoop(t, wr) defer goodSlave1.StopActionLoop(t) // Slave2: expect to be re-parented goodSlave2.FakeMysqlDaemon.ReadOnly = true goodSlave2.FakeMysqlDaemon.ResetReplicationResult = []string{"reset rep 2"} goodSlave2.FakeMysqlDaemon.StartReplicationCommandsStatus = &myproto.ReplicationStatus{ Position: master.FakeMysqlDaemon.CurrentMasterPosition, MasterHost: master.Tablet.Hostname, MasterPort: int(master.Tablet.PortMap["mysql"]), MasterConnectRetry: 10, } goodSlave2.FakeMysqlDaemon.StartReplicationCommandsResult = []string{"cmd1", "cmd2"} goodSlave2.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "reset rep 2", "cmd1", "cmd2", } goodSlave2.StartActionLoop(t, wr) defer goodSlave2.StopActionLoop(t) // run InitShardMaster if err := vp.Run([]string{"InitShardMaster", "-wait_slave_timeout", "10s", master.Tablet.Keyspace + "/" + master.Tablet.Shard, topo.TabletAliasString(master.Tablet.Alias)}); err != nil { t.Fatalf("InitShardMaster failed: %v", err) } // check what was run if master.FakeMysqlDaemon.ReadOnly { t.Errorf("master was not turned read-write") } si, err := ts.GetShard(ctx, master.Tablet.Keyspace, master.Tablet.Shard) if err != nil { t.Fatalf("GetShard failed: %v", err) } if !topo.TabletAliasEqual(si.MasterAlias, master.Tablet.Alias) { t.Errorf("unexpected shard master alias, got %v expected %v", si.MasterAlias, master.Tablet.Alias) } if err := master.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("master.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } if err := goodSlave1.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("goodSlave1.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } if err := goodSlave2.FakeMysqlDaemon.CheckSuperQueryList(); err != nil { t.Fatalf("goodSlave2.FakeMysqlDaemon.CheckSuperQueryList failed: %v", err) } }
// InitTablet creates or updates a tablet. If no parent is specified // in the tablet, and the tablet has a slave type, we will find the // appropriate parent. If createShardAndKeyspace is true and the // parent keyspace or shard don't exist, they will be created. If // update is true, and a tablet with the same ID exists, update it. // If Force is true, and a tablet with the same ID already exists, it // will be scrapped and deleted, and then recreated. func (wr *Wrangler) InitTablet(ctx context.Context, tablet *pb.Tablet, force, createShardAndKeyspace, update bool) error { if err := topo.TabletComplete(tablet); err != nil { return err } if topo.IsInReplicationGraph(tablet.Type) { // get the shard, possibly creating it var err error var si *topo.ShardInfo if createShardAndKeyspace { // create the parent keyspace and shard if needed si, err = topotools.GetOrCreateShard(ctx, wr.ts, tablet.Keyspace, tablet.Shard) } else { si, err = wr.ts.GetShard(ctx, tablet.Keyspace, tablet.Shard) if err == topo.ErrNoNode { return fmt.Errorf("missing parent shard, use -parent option to create it, or CreateKeyspace / CreateShard") } } // get the shard, checks a couple things if err != nil { return fmt.Errorf("cannot get (or create) shard %v/%v: %v", tablet.Keyspace, tablet.Shard, err) } if !key.KeyRangeEqual(si.KeyRange, tablet.KeyRange) { return fmt.Errorf("shard %v/%v has a different KeyRange: %v != %v", tablet.Keyspace, tablet.Shard, si.KeyRange, tablet.KeyRange) } if tablet.Type == pb.TabletType_MASTER && !topo.TabletAliasIsZero(si.MasterAlias) && !topo.TabletAliasEqual(si.MasterAlias, tablet.Alias) && !force { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topo.TabletAliasString(si.MasterAlias), tablet.Keyspace, tablet.Shard) } // update the shard record if needed if err := wr.updateShardCellsAndMaster(ctx, si, tablet.Alias, tablet.Type, force); err != nil { return err } } err := topo.CreateTablet(ctx, wr.ts, tablet) if err != nil && err == topo.ErrNodeExists { // Try to update nicely, but if it fails fall back to force behavior. if update || force { oldTablet, err := wr.ts.GetTablet(ctx, tablet.Alias) if err != nil { wr.Logger().Warningf("failed reading tablet %v: %v", topo.TabletAliasString(tablet.Alias), err) } else { if oldTablet.Keyspace == tablet.Keyspace && oldTablet.Shard == tablet.Shard { *(oldTablet.Tablet) = *tablet if err := topo.UpdateTablet(ctx, wr.ts, oldTablet); err != nil { wr.Logger().Warningf("failed updating tablet %v: %v", topo.TabletAliasString(tablet.Alias), err) // now fall through the Scrap case } else { if !topo.IsInReplicationGraph(tablet.Type) { return nil } if err := topo.UpdateTabletReplicationData(ctx, wr.ts, tablet); err != nil { wr.Logger().Warningf("failed updating tablet replication data for %v: %v", topo.TabletAliasString(tablet.Alias), err) // now fall through the Scrap case } else { return nil } } } } } if force { if err = wr.Scrap(ctx, tablet.Alias, force, false); err != nil { wr.Logger().Errorf("failed scrapping tablet %v: %v", topo.TabletAliasString(tablet.Alias), err) return err } if err := wr.ts.DeleteTablet(ctx, tablet.Alias); err != nil { // we ignore this wr.Logger().Errorf("failed deleting tablet %v: %v", topo.TabletAliasString(tablet.Alias), err) } return topo.CreateTablet(ctx, wr.ts, tablet) } } return err }
// TestInitMasterShardOneSlaveFails makes sure that if one slave fails to // proceed, the action completes anyway func TestInitMasterShardOneSlaveFails(t *testing.T) { ctx := context.Background() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient(), time.Second) // Create a master, a couple slaves master := NewFakeTablet(t, wr, "cell1", 0, pb.TabletType_MASTER) goodSlave := NewFakeTablet(t, wr, "cell1", 1, pb.TabletType_REPLICA) badSlave := NewFakeTablet(t, wr, "cell2", 2, pb.TabletType_REPLICA) // Master: set a plausible ReplicationPosition to return, // and expect to add entry in _vt.reparent_journal master.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{ Domain: 5, Server: 456, Sequence: 890, }, } master.FakeMysqlDaemon.ReadOnly = true master.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = []string{ "CREATE DATABASE IF NOT EXISTS _vt", "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", "CREATE DATABASE IF NOT EXISTS _vt", "SUBCREATE TABLE IF NOT EXISTS _vt.reparent_journal", "SUBINSERT INTO _vt.reparent_journal (time_created_ns, action_name, master_alias, replication_position) VALUES", } master.StartActionLoop(t, wr) defer master.StopActionLoop(t) // goodSlave: expect to be re-parented goodSlave.FakeMysqlDaemon.ReadOnly = true goodSlave.FakeMysqlDaemon.StartReplicationCommandsStatus = &myproto.ReplicationStatus{ Position: master.FakeMysqlDaemon.CurrentMasterPosition, MasterHost: master.Tablet.Hostname, MasterPort: int(master.Tablet.PortMap["mysql"]), MasterConnectRetry: 10, } goodSlave.FakeMysqlDaemon.StartReplicationCommandsResult = []string{"cmd1"} goodSlave.FakeMysqlDaemon.ExpectedExecuteSuperQueryList = goodSlave.FakeMysqlDaemon.StartReplicationCommandsResult goodSlave.StartActionLoop(t, wr) defer goodSlave.StopActionLoop(t) // badSlave: insert an error by failing the ReplicationStatus input // on purpose badSlave.FakeMysqlDaemon.ReadOnly = true badSlave.FakeMysqlDaemon.StartReplicationCommandsStatus = &myproto.ReplicationStatus{ Position: master.FakeMysqlDaemon.CurrentMasterPosition, MasterHost: "", MasterPort: 0, MasterConnectRetry: 10, } badSlave.StartActionLoop(t, wr) defer badSlave.StopActionLoop(t) // also change the master alias in the Shard object, to make sure it // is set back. si, err := ts.GetShard(ctx, master.Tablet.Keyspace, master.Tablet.Shard) if err != nil { t.Fatalf("GetShard failed: %v", err) } si.MasterAlias.Uid++ if err := topo.UpdateShard(ctx, ts, si); err != nil { t.Fatalf("UpdateShard failed: %v", err) } // run InitShardMaster without force, it fails because master is // changing. if err := wr.InitShardMaster(ctx, master.Tablet.Keyspace, master.Tablet.Shard, master.Tablet.Alias, false /*force*/, 10*time.Second); err == nil || !strings.Contains(err.Error(), "is not the shard master") { t.Errorf("InitShardMaster with mismatched new master returned wrong error: %v", err) } // run InitShardMaster if err := wr.InitShardMaster(ctx, master.Tablet.Keyspace, master.Tablet.Shard, master.Tablet.Alias, true /*force*/, 10*time.Second); err == nil || !strings.Contains(err.Error(), "wrong status for StartReplicationCommands") { t.Errorf("InitShardMaster with one failed slave returned wrong error: %v", err) } // check what was run: master should still be good if master.FakeMysqlDaemon.ReadOnly { t.Errorf("master was not turned read-write") } si, err = ts.GetShard(ctx, master.Tablet.Keyspace, master.Tablet.Shard) if err != nil { t.Fatalf("GetShard failed: %v", err) } if !topo.TabletAliasEqual(si.MasterAlias, master.Tablet.Alias) { t.Errorf("unexpected shard master alias, got %v expected %v", si.MasterAlias, master.Tablet.Alias) } }
func TestTabletExternallyReparented(t *testing.T) { tabletmanager.SetReparentFlags(time.Minute /* finalizeTimeout */) ctx := context.Background() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient(), time.Second) vp := NewVtctlPipe(t, ts) defer vp.Close() // Create an old master, a new master, two good slaves, one bad slave oldMaster := NewFakeTablet(t, wr, "cell1", 0, pb.TabletType_MASTER) newMaster := NewFakeTablet(t, wr, "cell1", 1, pb.TabletType_REPLICA) goodSlave1 := NewFakeTablet(t, wr, "cell1", 2, pb.TabletType_REPLICA) goodSlave2 := NewFakeTablet(t, wr, "cell2", 3, pb.TabletType_REPLICA) badSlave := NewFakeTablet(t, wr, "cell1", 4, pb.TabletType_REPLICA) // Add a new Cell to the Shard, that doesn't map to any read topo cell, // to simulate a data center being unreachable. si, err := ts.GetShard(ctx, "test_keyspace", "0") if err != nil { t.Fatalf("GetShard failed: %v", err) } si.Cells = append(si.Cells, "cell666") if err := topo.UpdateShard(ctx, ts, si); err != nil { t.Fatalf("UpdateShard failed: %v", err) } // Slightly unrelated test: make sure we can find the tablets // even with a datacenter being down. tabletMap, err := topo.GetTabletMapForShardByCell(ctx, ts, "test_keyspace", "0", []string{"cell1"}) if err != nil { t.Fatalf("GetTabletMapForShardByCell should have worked but got: %v", err) } master, err := topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.Ip, "vt", oldMaster.Tablet.PortMap["vt"]) if err != nil || !topo.TabletAliasEqual(&master, oldMaster.Tablet.Alias) { t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master) } slave1, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave1.Tablet.Ip, "vt", goodSlave1.Tablet.PortMap["vt"]) if err != nil || !topo.TabletAliasEqual(&slave1, goodSlave1.Tablet.Alias) { t.Fatalf("FindTabletByIPAddrAndPort(slave1) failed: %v %v", err, master) } slave2, err := topotools.FindTabletByIPAddrAndPort(tabletMap, goodSlave2.Tablet.Ip, "vt", goodSlave2.Tablet.PortMap["vt"]) if err != topo.ErrNoNode { t.Fatalf("FindTabletByIPAddrAndPort(slave2) worked: %v %v", err, slave2) } // Make sure the master is not exported in other cells tabletMap, err = topo.GetTabletMapForShardByCell(ctx, ts, "test_keyspace", "0", []string{"cell2"}) master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.Ip, "vt", oldMaster.Tablet.PortMap["vt"]) if err != topo.ErrNoNode { t.Fatalf("FindTabletByIPAddrAndPort(master) worked in cell2: %v %v", err, master) } tabletMap, err = topo.GetTabletMapForShard(ctx, ts, "test_keyspace", "0") if err != topo.ErrPartialResult { t.Fatalf("GetTabletMapForShard should have returned ErrPartialResult but got: %v", err) } master, err = topotools.FindTabletByIPAddrAndPort(tabletMap, oldMaster.Tablet.Ip, "vt", oldMaster.Tablet.PortMap["vt"]) if err != nil || !topo.TabletAliasEqual(&master, oldMaster.Tablet.Alias) { t.Fatalf("FindTabletByIPAddrAndPort(master) failed: %v %v", err, master) } // On the elected master, we will respond to // TabletActionSlaveWasPromoted newMaster.StartActionLoop(t, wr) defer newMaster.StopActionLoop(t) // On the old master, we will only respond to // TabletActionSlaveWasRestarted. oldMaster.StartActionLoop(t, wr) defer oldMaster.StopActionLoop(t) // On the good slaves, we will respond to // TabletActionSlaveWasRestarted. goodSlave1.StartActionLoop(t, wr) defer goodSlave1.StopActionLoop(t) goodSlave2.StartActionLoop(t, wr) defer goodSlave2.StopActionLoop(t) // On the bad slave, we will respond to // TabletActionSlaveWasRestarted with bad data. badSlave.StartActionLoop(t, wr) defer badSlave.StopActionLoop(t) // First test: reparent to the same master, make sure it works // as expected. tmc := tmclient.NewTabletManagerClient() ti, err := ts.GetTablet(ctx, oldMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if err := vp.Run([]string{"TabletExternallyReparented", topo.TabletAliasString(oldMaster.Tablet.Alias)}); err != nil { t.Fatalf("TabletExternallyReparented(same master) should have worked: %v", err) } // Second test: reparent to a replica, and pretend the old // master is still good to go. // This tests a bad case; the new designated master is a slave, // but we should do what we're told anyway ti, err = ts.GetTablet(ctx, goodSlave1.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } if err := tmc.TabletExternallyReparented(context.Background(), ti, ""); err != nil { t.Fatalf("TabletExternallyReparented(slave) error: %v", err) } // This tests the good case, where everything works as planned t.Logf("TabletExternallyReparented(new master) expecting success") ti, err = ts.GetTablet(ctx, newMaster.Tablet.Alias) if err != nil { t.Fatalf("GetTablet failed: %v", err) } waitID := makeWaitID() if err := tmc.TabletExternallyReparented(context.Background(), ti, waitID); err != nil { t.Fatalf("TabletExternallyReparented(replica) failed: %v", err) } waitForExternalReparent(t, waitID) // Now double-check the serving graph is good. // Should only have one good replica left. addrs, _, err := ts.GetEndPoints(ctx, "cell1", "test_keyspace", "0", pb.TabletType_REPLICA) if err != nil { t.Fatalf("GetEndPoints failed at the end: %v", err) } if len(addrs.Entries) != 1 { t.Fatalf("GetEndPoints has too many entries: %v", addrs) } }
func TestShardReplicationStatuses(t *testing.T) { ctx := context.Background() ts := zktopo.NewTestServer(t, []string{"cell1", "cell2"}) wr := wrangler.New(logutil.NewConsoleLogger(), ts, tmclient.NewTabletManagerClient(), time.Second) // create shard and tablets if err := topo.CreateShard(ctx, ts, "test_keyspace", "0"); err != nil { t.Fatalf("CreateShard failed: %v", err) } master := NewFakeTablet(t, wr, "cell1", 1, pb.TabletType_MASTER) slave := NewFakeTablet(t, wr, "cell1", 2, pb.TabletType_REPLICA) // mark the master inside the shard si, err := ts.GetShard(ctx, "test_keyspace", "0") if err != nil { t.Fatalf("GetShard failed: %v", err) } si.MasterAlias = master.Tablet.Alias if err := topo.UpdateShard(ctx, ts, si); err != nil { t.Fatalf("UpdateShard failed: %v", err) } // master action loop (to initialize host and port) master.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{ Domain: 5, Server: 456, Sequence: 892, }, } master.StartActionLoop(t, wr) defer master.StopActionLoop(t) // slave loop slave.FakeMysqlDaemon.CurrentMasterPosition = myproto.ReplicationPosition{ GTIDSet: myproto.MariadbGTID{ Domain: 5, Server: 456, Sequence: 890, }, } slave.FakeMysqlDaemon.CurrentMasterHost = master.Tablet.Hostname slave.FakeMysqlDaemon.CurrentMasterPort = int(master.Tablet.PortMap["mysql"]) slave.StartActionLoop(t, wr) defer slave.StopActionLoop(t) // run ShardReplicationStatuses ti, rs, err := wr.ShardReplicationStatuses(ctx, "test_keyspace", "0") if err != nil { t.Fatalf("ShardReplicationStatuses failed: %v", err) } // check result (make master first in the array) if len(ti) != 2 || len(rs) != 2 { t.Fatalf("ShardReplicationStatuses returned wrong results: %v %v", ti, rs) } if topo.TabletAliasEqual(ti[0].Alias, slave.Tablet.Alias) { ti[0], ti[1] = ti[1], ti[0] rs[0], rs[1] = rs[1], rs[0] } if !topo.TabletAliasEqual(ti[0].Alias, master.Tablet.Alias) || !topo.TabletAliasEqual(ti[1].Alias, slave.Tablet.Alias) || rs[0].MasterHost != "" || rs[1].MasterHost != master.Tablet.Hostname { t.Fatalf("ShardReplicationStatuses returend wrong results: %v %v", ti, rs) } }
func (wr *Wrangler) emergencyReparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading all tablets") tabletMap, err := topo.GetTabletMapForShard(ctx, wr.ts, keyspace, shard) if err != nil { return err } // Check corner cases we're going to depend on masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", masterElectTabletAlias) } ev.NewMaster = *masterElectTabletInfo.Tablet if topo.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { return fmt.Errorf("master-elect tablet %v is already the master", masterElectTabletAlias) } // Deal with the old master: try to remote-scrap it, if it's // truely dead we force-scrap it. Remove it from our map in any case. if !topo.TabletAliasIsZero(shardInfo.MasterAlias) { scrapOldMaster := true oldMasterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias] if ok { delete(tabletMap, *shardInfo.MasterAlias) } else { oldMasterTabletInfo, err = wr.ts.GetTablet(ctx, shardInfo.MasterAlias) if err != nil { wr.logger.Warningf("cannot read old master tablet %v, won't touch it: %v", shardInfo.MasterAlias, err) scrapOldMaster = false } } if scrapOldMaster { ev.OldMaster = *oldMasterTabletInfo.Tablet wr.logger.Infof("scrapping old master %v", shardInfo.MasterAlias) ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout) defer cancel() if err := wr.tmc.Scrap(ctx, oldMasterTabletInfo); err != nil { wr.logger.Warningf("remote scrapping failed master failed, will force the scrap: %v", err) if err := topotools.Scrap(ctx, wr.ts, shardInfo.MasterAlias, true); err != nil { wr.logger.Warningf("old master topo scrapping failed, continuing anyway: %v", err) } } } } // Stop replication on all slaves, get their current // replication position event.DispatchUpdate(ev, "stop replication on all slaves") wg := sync.WaitGroup{} mu := sync.Mutex{} statusMap := make(map[pb.TabletAlias]myproto.ReplicationStatus) for alias, tabletInfo := range tabletMap { wg.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wg.Done() wr.logger.Infof("getting replication position from %v", alias) ctx, cancel := context.WithTimeout(ctx, waitSlaveTimeout) defer cancel() rp, err := wr.TabletManagerClient().StopReplicationAndGetStatus(ctx, tabletInfo) if err != nil { wr.logger.Warningf("failed to get replication status from %v, ignoring tablet: %v", alias, err) return } mu.Lock() statusMap[alias] = rp mu.Unlock() }(alias, tabletInfo) } wg.Wait() // Verify masterElect is alive and has the most advanced position masterElectStatus, ok := statusMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("couldn't get master elect %v replication position", masterElectTabletAlias) } for alias, status := range statusMap { if topo.TabletAliasEqual(&alias, masterElectTabletAlias) { continue } if !masterElectStatus.Position.AtLeast(status.Position) { return fmt.Errorf("tablet %v is more advanced than master elect tablet %v: %v > %v", alias, masterElectTabletAlias, status.Position, masterElectStatus) } } // Promote the masterElect wr.logger.Infof("promote slave %v", masterElectTabletAlias) event.DispatchUpdate(ev, "promoting slave") rp, err := wr.tmc.PromoteSlave(ctx, masterElectTabletInfo) if err != nil { return fmt.Errorf("master-elect tablet %v failed to be upgraded to master: %v", masterElectTabletAlias, err) } // Reset replication on all slaves to point to the new master, and // insert test row in the new master. // Go through all the tablets: // - new master: populate the reparent journal // - everybody else: reparent to new master, wait for row event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} var masterErr error for alias, tabletInfo := range tabletMap { if topo.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", alias) masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, emergencyReparentShardOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("setting new master on slave %v", alias) forceStartSlave := false if status, ok := statusMap[alias]; ok { forceStartSlave = status.SlaveIORunning || status.SlaveSQLRunning } if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now, forceStartSlave); err != nil { rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", alias, err)) } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } wr.logger.Infof("updating shard record with new master %v", masterElectTabletAlias) shardInfo.MasterAlias = masterElectTabletAlias if err := topo.UpdateShard(ctx, wr.ts, shardInfo); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } // Wait for the slaves to complete. If some of them fail, we // will rebuild the shard serving graph anyway wgSlaves.Wait() if err := rec.Error(); err != nil { wr.Logger().Errorf("Some slaves failed to reparent: %v", err) return err } // Then we rebuild the entire serving graph for the shard, // to account for all changes. wr.logger.Infof("rebuilding shard graph") event.DispatchUpdate(ev, "rebuilding shard serving graph") _, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil) return err }
func (wr *Wrangler) plannedReparentShardLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading tablet map") tabletMap, err := topo.GetTabletMapForShard(ctx, wr.ts, keyspace, shard) if err != nil { return err } // Check corner cases we're going to depend on masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", masterElectTabletAlias) } ev.NewMaster = *masterElectTabletInfo.Tablet if topo.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { return fmt.Errorf("master-elect tablet %v is already the master", masterElectTabletAlias) } oldMasterTabletInfo, ok := tabletMap[*shardInfo.MasterAlias] if !ok { return fmt.Errorf("old master tablet %v is not in the shard", shardInfo.MasterAlias) } ev.OldMaster = *oldMasterTabletInfo.Tablet // Demote the current master, get its replication position wr.logger.Infof("demote current master %v", shardInfo.MasterAlias) event.DispatchUpdate(ev, "demoting old master") rp, err := wr.tmc.DemoteMaster(ctx, oldMasterTabletInfo) if err != nil { return fmt.Errorf("old master tablet %v DemoteMaster failed: %v", shardInfo.MasterAlias, err) } // Wait on the master-elect tablet until it reaches that position, // then promote it wr.logger.Infof("promote slave %v", masterElectTabletAlias) event.DispatchUpdate(ev, "promoting slave") rp, err = wr.tmc.PromoteSlaveWhenCaughtUp(ctx, masterElectTabletInfo, rp) if err != nil { return fmt.Errorf("master-elect tablet %v failed to catch up with replication or be upgraded to master: %v", masterElectTabletAlias, err) } // Go through all the tablets: // - new master: populate the reparent journal // - everybody else: reparent to new master, wait for row event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} var masterErr error for alias, tabletInfo := range tabletMap { if topo.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", alias) masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, plannedReparentShardOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("setting new master on slave %v", alias) // also restart replication on old master forceStartSlave := topo.TabletAliasEqual(&alias, oldMasterTabletInfo.Alias) if err := wr.TabletManagerClient().SetMaster(ctx, tabletInfo, masterElectTabletAlias, now, forceStartSlave); err != nil { rec.RecordError(fmt.Errorf("Tablet %v SetMaster failed: %v", alias, err)) return } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } wr.logger.Infof("updating shard record with new master %v", masterElectTabletAlias) shardInfo.MasterAlias = masterElectTabletAlias if err := topo.UpdateShard(ctx, wr.ts, shardInfo); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } // Wait for the slaves to complete. If some of them fail, we // will rebuild the shard serving graph anyway wgSlaves.Wait() if err := rec.Error(); err != nil { wr.Logger().Errorf("Some slaves failed to reparent: %v", err) return err } // Then we rebuild the entire serving graph for the shard, // to account for all changes. wr.logger.Infof("rebuilding shard graph") event.DispatchUpdate(ev, "rebuilding shard serving graph") _, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil) return err }
func (wr *Wrangler) initShardMasterLocked(ctx context.Context, ev *events.Reparent, keyspace, shard string, masterElectTabletAlias *pb.TabletAlias, force bool, waitSlaveTimeout time.Duration) error { shardInfo, err := wr.ts.GetShard(ctx, keyspace, shard) if err != nil { return err } ev.ShardInfo = *shardInfo event.DispatchUpdate(ev, "reading tablet map") tabletMap, err := topo.GetTabletMapForShard(ctx, wr.ts, keyspace, shard) if err != nil { return err } // Check the master elect is in tabletMap masterElectTabletInfo, ok := tabletMap[*masterElectTabletAlias] if !ok { return fmt.Errorf("master-elect tablet %v is not in the shard", masterElectTabletAlias) } ev.NewMaster = *masterElectTabletInfo.Tablet // Check the master is the only master is the shard, or -force was used. _, masterTabletMap := topotools.SortedTabletMap(tabletMap) if !topo.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { if !force { return fmt.Errorf("master-elect tablet %v is not the shard master, use -force to proceed anyway", masterElectTabletAlias) } wr.logger.Warningf("master-elect tablet %v is not the shard master, proceeding anyway as -force was used", masterElectTabletAlias) } if _, ok := masterTabletMap[*masterElectTabletAlias]; !ok { if !force { return fmt.Errorf("master-elect tablet %v is not a master in the shard, use -force to proceed anyway", masterElectTabletAlias) } wr.logger.Warningf("master-elect tablet %v is not a master in the shard, proceeding anyway as -force was used", masterElectTabletAlias) } haveOtherMaster := false for alias, ti := range masterTabletMap { if !topo.TabletAliasEqual(&alias, masterElectTabletAlias) && ti.Type != pb.TabletType_SCRAP { haveOtherMaster = true } } if haveOtherMaster { if !force { return fmt.Errorf("master-elect tablet %v is not the only master in the shard, use -force to proceed anyway", masterElectTabletAlias) } wr.logger.Warningf("master-elect tablet %v is not the only master in the shard, proceeding anyway as -force was used", masterElectTabletAlias) } // First phase: reset replication on all tablets. If anyone fails, // we stop. It is probably because it is unreachable, and may leave // an unstable database process in the mix, with a database daemon // at a wrong replication spot. event.DispatchUpdate(ev, "resetting replication on all tablets") wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} for alias, tabletInfo := range tabletMap { wg.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wg.Done() wr.logger.Infof("resetting replication on tablet %v", alias) if err := wr.TabletManagerClient().ResetReplication(ctx, tabletInfo); err != nil { rec.RecordError(fmt.Errorf("Tablet %v ResetReplication failed (either fix it, or Scrap it): %v", alias, err)) } }(alias, tabletInfo) } wg.Wait() if err := rec.Error(); err != nil { return err } // Tell the new master to break its slaves, return its replication // position wr.logger.Infof("initializing master on %v", masterElectTabletAlias) event.DispatchUpdate(ev, "initializing master") rp, err := wr.TabletManagerClient().InitMaster(ctx, masterElectTabletInfo) if err != nil { return err } // Now tell the new master to insert the reparent_journal row, // and tell everybody else to become a slave of the new master, // and wait for the row in the reparent_journal table. // We start all these in parallel, to handle the semi-sync // case: for the master to be able to commit its row in the // reparent_journal table, it needs connected slaves. event.DispatchUpdate(ev, "reparenting all tablets") now := time.Now().UnixNano() wgMaster := sync.WaitGroup{} wgSlaves := sync.WaitGroup{} var masterErr error for alias, tabletInfo := range tabletMap { if topo.TabletAliasEqual(&alias, masterElectTabletAlias) { wgMaster.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgMaster.Done() wr.logger.Infof("populating reparent journal on new master %v", alias) masterErr = wr.TabletManagerClient().PopulateReparentJournal(ctx, tabletInfo, now, initShardMasterOperation, &alias, rp) }(alias, tabletInfo) } else { wgSlaves.Add(1) go func(alias pb.TabletAlias, tabletInfo *topo.TabletInfo) { defer wgSlaves.Done() wr.logger.Infof("initializing slave %v", alias) if err := wr.TabletManagerClient().InitSlave(ctx, tabletInfo, masterElectTabletAlias, rp, now); err != nil { rec.RecordError(fmt.Errorf("Tablet %v InitSlave failed: %v", alias, err)) } }(alias, tabletInfo) } } // After the master is done, we can update the shard record // (note with semi-sync, it also means at least one slave is done) wgMaster.Wait() if masterErr != nil { wgSlaves.Wait() return fmt.Errorf("failed to PopulateReparentJournal on master: %v", masterErr) } if !topo.TabletAliasEqual(shardInfo.MasterAlias, masterElectTabletAlias) { shardInfo.MasterAlias = masterElectTabletAlias if err := topo.UpdateShard(ctx, wr.ts, shardInfo); err != nil { wgSlaves.Wait() return fmt.Errorf("failed to update shard master record: %v", err) } } // Wait for the slaves to complete. If some of them fail, we // don't want to rebuild the shard serving graph (the failure // will most likely be a timeout, and our context will be // expired, so the rebuild will fail anyway) wgSlaves.Wait() if err := rec.Error(); err != nil { return err } // Then we rebuild the entire serving graph for the shard, // to account for all changes. event.DispatchUpdate(ev, "rebuilding shard graph") _, err = wr.RebuildShardGraph(ctx, keyspace, shard, nil) return err }
// TabletExternallyReparented updates all topo records so the current // tablet is the new master for this shard. // Should be called under RPCWrapLock. func (agent *ActionAgent) TabletExternallyReparented(ctx context.Context, externalID string) error { startTime := time.Now() // If there is a finalize step running, wait for it to finish or time out // before checking the global shard record again. if agent.finalizeReparentCtx != nil { select { case <-agent.finalizeReparentCtx.Done(): agent.finalizeReparentCtx = nil case <-ctx.Done(): return ctx.Err() } } tablet := agent.Tablet() // Check the global shard record. si, err := topo.GetShard(ctx, agent.TopoServer, tablet.Keyspace, tablet.Shard) if err != nil { log.Warningf("fastTabletExternallyReparented: failed to read global shard record for %v/%v: %v", tablet.Keyspace, tablet.Shard, err) return err } if topo.TabletAliasEqual(si.MasterAlias, tablet.Alias) { // We may get called on the current master even when nothing has changed. // If the global shard record is already updated, it means we successfully // finished a previous reparent to this tablet. return nil } // Create a reusable Reparent event with available info. ev := &events.Reparent{ ShardInfo: *si, NewMaster: *tablet.Tablet, OldMaster: pb.Tablet{ Alias: si.MasterAlias, Type: pb.TabletType_MASTER, }, ExternalID: externalID, } defer func() { if err != nil { event.DispatchUpdate(ev, "failed: "+err.Error()) } }() event.DispatchUpdate(ev, "starting external from tablet (fast)") // Execute state change to master by force-updating only the local copy of the // tablet record. The actual record in topo will be updated later. log.Infof("fastTabletExternallyReparented: executing change callback for state change to MASTER") oldTablet := *tablet.Tablet newTablet := oldTablet newTablet.Type = pb.TabletType_MASTER newTablet.HealthMap = nil agent.setTablet(topo.NewTabletInfo(&newTablet, -1)) if err := agent.updateState(ctx, &oldTablet, "fastTabletExternallyReparented"); err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to change tablet state to MASTER: %v", err) } agent.mutex.Lock() agent._tabletExternallyReparentedTime = time.Now() agent.mutex.Unlock() // Directly write the new master endpoint in the serving graph. // We will do a true rebuild in the background soon, but in the meantime, // this will be enough for clients to re-resolve the new master. event.DispatchUpdate(ev, "writing new master endpoint") log.Infof("fastTabletExternallyReparented: writing new master endpoint to serving graph") ep, err := topo.TabletEndPoint(tablet.Tablet) if err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to generate EndPoint for tablet %v: %v", tablet.Alias, err) } err = topo.UpdateEndPoints(ctx, agent.TopoServer, tablet.Alias.Cell, si.Keyspace(), si.ShardName(), pb.TabletType_MASTER, &pb.EndPoints{Entries: []*pb.EndPoint{ep}}, -1) if err != nil { return fmt.Errorf("fastTabletExternallyReparented: failed to update master endpoint: %v", err) } externalReparentStats.Record("NewMasterVisible", startTime) // Start the finalize stage with a background context, but connect the trace. bgCtx, cancel := context.WithTimeout(agent.batchCtx, *finalizeReparentTimeout) bgCtx = trace.CopySpan(bgCtx, ctx) agent.finalizeReparentCtx = bgCtx go func() { err := agent.finalizeTabletExternallyReparented(bgCtx, si, ev) cancel() if err != nil { log.Warningf("finalizeTabletExternallyReparented error: %v", err) event.DispatchUpdate(ev, "failed: "+err.Error()) return } externalReparentStats.Record("FullRebuild", startTime) }() return nil }
// InitTablet initializes the tablet record if necessary. func (agent *ActionAgent) InitTablet(port, gRPCPort int32) error { // only enabled if one of init_tablet_type (when healthcheck // is disabled) or init_keyspace (when healthcheck is enabled) // is passed in, then check other parameters if *initTabletType == "" && *initKeyspace == "" { return nil } // figure out our default target type var tabletType pb.TabletType if *initTabletType != "" { if *targetTabletType != "" { log.Fatalf("cannot specify both target_tablet_type and init_tablet_type parameters (as they might conflict)") } // use the type specified on the command line var err error tabletType, err = topo.ParseTabletType(*initTabletType) if err != nil { log.Fatalf("Invalid init tablet type %v: %v", *initTabletType, err) } if tabletType == pb.TabletType_MASTER || tabletType == pb.TabletType_SCRAP { // We disallow TYPE_MASTER, so we don't have to change // shard.MasterAlias, and deal with the corner cases. // We also disallow TYPE_SCRAP, obviously. log.Fatalf("init_tablet_type cannot be %v", tabletType) } } else if *targetTabletType != "" { if strings.ToUpper(*targetTabletType) == pb.TabletType_name[int32(pb.TabletType_MASTER)] { log.Fatalf("target_tablet_type cannot be '%v'. Use '%v' instead.", tabletType, pb.TabletType_REPLICA) } // use spare, the healthcheck will turn us into what // we need to be eventually tabletType = pb.TabletType_SPARE } else { log.Fatalf("if init tablet is enabled, one of init_tablet_type or target_tablet_type needs to be specified") } // create a context for this whole operation ctx, cancel := context.WithTimeout(agent.batchCtx, *initTimeout) defer cancel() // if we're assigned to a shard, make sure it exists, see if // we are its master, and update its cells list if necessary if tabletType != pb.TabletType_IDLE { if *initKeyspace == "" || *initShard == "" { log.Fatalf("if init tablet is enabled and the target type is not idle, init_keyspace and init_shard also need to be specified") } shard, _, err := topo.ValidateShardName(*initShard) if err != nil { log.Fatalf("cannot validate shard name: %v", err) } log.Infof("Reading shard record %v/%v", *initKeyspace, shard) // read the shard, create it if necessary si, err := topotools.GetOrCreateShard(ctx, agent.TopoServer, *initKeyspace, shard) if err != nil { return fmt.Errorf("InitTablet cannot GetOrCreateShard shard: %v", err) } if si.MasterAlias != nil && topo.TabletAliasEqual(si.MasterAlias, agent.TabletAlias) { // we are the current master for this shard (probably // means the master tablet process was just restarted), // so InitTablet as master. tabletType = pb.TabletType_MASTER } // See if we need to add the tablet's cell to the shard's cell // list. If we do, it has to be under the shard lock. if !si.HasCell(agent.TabletAlias.Cell) { actionNode := actionnode.UpdateShard() lockPath, err := actionNode.LockShard(ctx, agent.TopoServer, *initKeyspace, shard) if err != nil { return fmt.Errorf("LockShard(%v/%v) failed: %v", *initKeyspace, shard, err) } // re-read the shard with the lock si, err = agent.TopoServer.GetShard(ctx, *initKeyspace, shard) if err != nil { return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err) } // see if we really need to update it now if !si.HasCell(agent.TabletAlias.Cell) { si.Cells = append(si.Cells, agent.TabletAlias.Cell) // write it back if err := topo.UpdateShard(ctx, agent.TopoServer, si); err != nil { return actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, err) } } // and unlock if err := actionNode.UnlockShard(ctx, agent.TopoServer, *initKeyspace, shard, lockPath, nil); err != nil { return err } } } log.Infof("Initializing the tablet for type %v", tabletType) // figure out the hostname hostname := *tabletHostname if hostname == "" { var err error hostname, err = netutil.FullyQualifiedHostname() if err != nil { return err } } // create and populate tablet record tablet := &pb.Tablet{ Alias: agent.TabletAlias, Hostname: hostname, PortMap: make(map[string]int32), Keyspace: *initKeyspace, Shard: *initShard, Type: tabletType, DbNameOverride: *initDbNameOverride, Tags: initTags, } if port != 0 { tablet.PortMap["vt"] = port } if gRPCPort != 0 { tablet.PortMap["grpc"] = gRPCPort } if err := topo.TabletComplete(tablet); err != nil { return fmt.Errorf("InitTablet TabletComplete failed: %v", err) } // now try to create the record err := topo.CreateTablet(ctx, agent.TopoServer, tablet) switch err { case nil: // it worked, we're good, can update the replication graph if topo.IsInReplicationGraph(tablet.Type) { if err := topo.UpdateTabletReplicationData(ctx, agent.TopoServer, tablet); err != nil { return fmt.Errorf("UpdateTabletReplicationData failed: %v", err) } } case topo.ErrNodeExists: // The node already exists, will just try to update // it. So we read it first. oldTablet, err := agent.TopoServer.GetTablet(ctx, tablet.Alias) if err != nil { fmt.Errorf("InitTablet failed to read existing tablet record: %v", err) } // Sanity check the keyspace and shard if oldTablet.Keyspace != tablet.Keyspace || oldTablet.Shard != tablet.Shard { return fmt.Errorf("InitTablet failed because existing tablet keyspace and shard %v/%v differ from the provided ones %v/%v", oldTablet.Keyspace, oldTablet.Shard, tablet.Keyspace, tablet.Shard) } // And overwrite the rest *(oldTablet.Tablet) = *tablet if err := topo.UpdateTablet(ctx, agent.TopoServer, oldTablet); err != nil { return fmt.Errorf("UpdateTablet failed: %v", err) } // Note we don't need to UpdateTabletReplicationData // as the tablet already existed with the right data // in the replication graph default: return fmt.Errorf("CreateTablet failed: %v", err) } // and now update the serving graph. Note we do that in any case, // to clean any inaccurate record from any part of the serving graph. if tabletType != pb.TabletType_IDLE { if err := topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, tablet); err != nil { return fmt.Errorf("UpdateTabletEndpoints failed: %v", err) } } return nil }