// InitMaster breaks slaves replication, get the current MySQL replication // position, insert a row in the reparent_journal table, and returns // the replication position func (agent *ActionAgent) InitMaster(ctx context.Context) (myproto.ReplicationPosition, error) { // we need to insert something in the binlogs, so we can get the // current position. Let's just use the mysqlctl.CreateReparentJournal commands. cmds := mysqlctl.CreateReparentJournal() if err := agent.MysqlDaemon.ExecuteSuperQueryList(cmds); err != nil { return myproto.ReplicationPosition{}, err } // get the current replication position rp, err := agent.MysqlDaemon.MasterPosition() if err != nil { return myproto.ReplicationPosition{}, err } // Set the server read-write, from now on we can accept real // client writes. Note that if semi-sync replication is enabled, // we'll still need some slaves to be able to commit // transactions. if err := agent.MysqlDaemon.SetReadOnly(false); err != nil { return myproto.ReplicationPosition{}, err } // Change our type to master if not already if err := topo.UpdateTabletFields(ctx, agent.TopoServer, agent.TabletAlias, func(tablet *topo.Tablet) error { tablet.Type = topo.TYPE_MASTER tablet.Health = nil return nil }); err != nil { return myproto.ReplicationPosition{}, err } agent.initReplication = true return rp, nil }
// CheckTablet verifies the topo server API is correct for managing tablets. func CheckTablet(ctx context.Context, t *testing.T, ts topo.Server) { cell := getLocalCell(ctx, t, ts) tablet := &topo.Tablet{ Alias: topo.TabletAlias{Cell: cell, Uid: 1}, Hostname: "localhost", IPAddr: "10.11.12.13", Portmap: map[string]int{ "vt": 3333, "mysql": 3334, }, Tags: map[string]string{"tag": "value"}, Keyspace: "test_keyspace", Type: topo.TYPE_MASTER, KeyRange: newKeyRange("-10"), } if err := ts.CreateTablet(ctx, tablet); err != nil { t.Errorf("CreateTablet: %v", err) } if err := ts.CreateTablet(ctx, tablet); err != topo.ErrNodeExists { t.Errorf("CreateTablet(again): %v", err) } if _, err := ts.GetTablet(ctx, topo.TabletAlias{Cell: cell, Uid: 666}); err != topo.ErrNoNode { t.Errorf("GetTablet(666): %v", err) } ti, err := ts.GetTablet(ctx, tablet.Alias) if err != nil { t.Errorf("GetTablet %v: %v", tablet.Alias, err) } if eq, err := tabletEqual(ti.Tablet, tablet); err != nil { t.Errorf("cannot compare tablets: %v", err) } else if !eq { t.Errorf("put and got tablets are not identical:\n%#v\n%#v", tablet, ti.Tablet) } if _, err := ts.GetTabletsByCell(ctx, "666"); err != topo.ErrNoNode { t.Errorf("GetTabletsByCell(666): %v", err) } inCell, err := ts.GetTabletsByCell(ctx, cell) if err != nil { t.Errorf("GetTabletsByCell: %v", err) } if len(inCell) != 1 || inCell[0] != tablet.Alias { t.Errorf("GetTabletsByCell: want [%v], got %v", tablet.Alias, inCell) } ti.Hostname = "remotehost" if err := topo.UpdateTablet(ctx, ts, ti); err != nil { t.Errorf("UpdateTablet: %v", err) } ti, err = ts.GetTablet(ctx, tablet.Alias) if err != nil { t.Errorf("GetTablet %v: %v", tablet.Alias, err) } if want := "remotehost"; ti.Hostname != want { t.Errorf("ti.Hostname: want %v, got %v", want, ti.Hostname) } if err := topo.UpdateTabletFields(ctx, ts, tablet.Alias, func(t *topo.Tablet) error { t.Hostname = "anotherhost" return nil }); err != nil { t.Errorf("UpdateTabletFields: %v", err) } ti, err = ts.GetTablet(ctx, tablet.Alias) if err != nil { t.Errorf("GetTablet %v: %v", tablet.Alias, err) } if want := "anotherhost"; ti.Hostname != want { t.Errorf("ti.Hostname: want %v, got %v", want, ti.Hostname) } if err := ts.DeleteTablet(ctx, tablet.Alias); err != nil { t.Errorf("DeleteTablet: %v", err) } if err := ts.DeleteTablet(ctx, tablet.Alias); err != topo.ErrNoNode { t.Errorf("DeleteTablet(again): %v", err) } if _, err := ts.GetTablet(ctx, tablet.Alias); err != topo.ErrNoNode { t.Errorf("GetTablet: expected error, tablet was deleted: %v", err) } }
// finalizeTabletExternallyReparented performs slow, synchronized reconciliation // tasks that ensure topology is self-consistent, and then marks the reparent as // finished by updating the global shard record. func (agent *ActionAgent) finalizeTabletExternallyReparented(ctx context.Context, si *topo.ShardInfo, ev *events.Reparent) (err error) { var wg sync.WaitGroup var errs concurrency.AllErrorRecorder oldMasterAlias := si.MasterAlias // Update the tablet records and serving graph for the old and new master concurrently. event.DispatchUpdate(ev, "updating old and new master tablet records") log.Infof("finalizeTabletExternallyReparented: updating tablet records") wg.Add(1) go func() { defer wg.Done() // Update our own record to master. var updatedTablet *topo.Tablet err := topo.UpdateTabletFields(ctx, agent.TopoServer, agent.TabletAlias, func(tablet *topo.Tablet) error { tablet.Type = topo.TYPE_MASTER tablet.Health = nil updatedTablet = tablet return nil }) if err != nil { errs.RecordError(err) return } // Update the serving graph for the tablet. if updatedTablet != nil { errs.RecordError( topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, updatedTablet)) } }() if !oldMasterAlias.IsZero() { wg.Add(1) go func() { // Force the old master to spare. var oldMasterTablet *topo.Tablet err := topo.UpdateTabletFields(ctx, agent.TopoServer, oldMasterAlias, func(tablet *topo.Tablet) error { tablet.Type = topo.TYPE_SPARE oldMasterTablet = tablet return nil }) if err != nil { errs.RecordError(err) wg.Done() return } if oldMasterTablet != nil { // Update the serving graph. errs.RecordError( topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, oldMasterTablet)) wg.Done() // Tell the old master to refresh its state. We don't need to wait for it. tmc := tmclient.NewTabletManagerClient() tmc.RefreshState(ctx, topo.NewTabletInfo(oldMasterTablet, -1)) } }() } tablet := agent.Tablet() // Wait for the tablet records to be updated. At that point, any rebuild will // see the new master, so we're ready to mark the reparent as done in the // global shard record. wg.Wait() if errs.HasErrors() { return errs.Error() } // Update the master field in the global shard record. We don't use a lock // here anymore. The lock was only to ensure that the global shard record // didn't get modified between the time when we read it and the time when we // write it back. Now we use an update loop pattern to do that instead. event.DispatchUpdate(ev, "updating global shard record") log.Infof("finalizeTabletExternallyReparented: updating global shard record") si, err = topo.UpdateShardFields(ctx, agent.TopoServer, tablet.Keyspace, tablet.Shard, func(shard *topo.Shard) error { shard.MasterAlias = tablet.Alias return nil }) if err != nil { return err } // We already took care of updating the serving graph for the old and new masters. // All that's left now is in case of a cross-cell reparent, we need to update the // master cell setting in the SrvShard records of all cells. if oldMasterAlias.Cell != tablet.Alias.Cell { event.DispatchUpdate(ev, "rebuilding shard serving graph") log.Infof("finalizeTabletExternallyReparented: updating SrvShard in all cells for cross-cell reparent") if err := topotools.UpdateAllSrvShards(ctx, agent.TopoServer, si); err != nil { return err } } event.DispatchUpdate(ev, "finished") return nil }