// updateShardCellsAndMaster will update the 'Cells' and possibly // MasterAlias records for the shard, if needed. func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType, allowMasterOverride bool) error { // See if we need to update the Shard: // - add the tablet's cell to the shard's Cells if needed // - change the master if needed shardUpdateRequired := false if !si.HasCell(tabletAlias.Cell) { shardUpdateRequired = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) { shardUpdateRequired = true } if !shardUpdateRequired { return nil } // we do need to update the shard, lock it to not interfere with // reparenting operations. actionNode := actionnode.UpdateShard() keyspace := si.Keyspace() shard := si.ShardName() lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode) if err != nil { return err } // run the update _, err = wr.ts.UpdateShardFields(ctx, keyspace, shard, func(s *topodatapb.Shard) error { wasUpdated := false if !topoproto.ShardHasCell(s, tabletAlias.Cell) { s.Cells = append(s.Cells, tabletAlias.Cell) wasUpdated = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(s.MasterAlias, tabletAlias) { if !topoproto.TabletAliasIsZero(s.MasterAlias) && !allowMasterOverride { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topoproto.TabletAliasString(s.MasterAlias), keyspace, shard) } s.MasterAlias = tabletAlias wasUpdated = true } if !wasUpdated { return topo.ErrNoUpdateNeeded } return nil }) return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err) }
// updateShardCellsAndMaster will update the 'Cells' and possibly // MasterAlias records for the shard, if needed. func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType, allowMasterOverride bool) error { // See if we need to update the Shard: // - add the tablet's cell to the shard's Cells if needed // - change the master if needed shardUpdateRequired := false if !si.HasCell(tabletAlias.Cell) { shardUpdateRequired = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) { shardUpdateRequired = true } if !shardUpdateRequired { return nil } // run the update _, err := wr.ts.UpdateShardFields(ctx, si.Keyspace(), si.ShardName(), func(s *topo.ShardInfo) error { wasUpdated := false if !s.HasCell(tabletAlias.Cell) { s.Cells = append(s.Cells, tabletAlias.Cell) wasUpdated = true } if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(s.MasterAlias, tabletAlias) { if !topoproto.TabletAliasIsZero(s.MasterAlias) && !allowMasterOverride { return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topoproto.TabletAliasString(s.MasterAlias), si.Keyspace(), si.ShardName()) } s.MasterAlias = tabletAlias wasUpdated = true } if !wasUpdated { return topo.ErrNoUpdateNeeded } return nil }) return err }
// HasMaster returns true if the Shard has an assigned Master. func (si *ShardInfo) HasMaster() bool { return !topoproto.TabletAliasIsZero(si.Shard.MasterAlias) }
// finalizeTabletExternallyReparented performs slow, synchronized reconciliation // tasks that ensure topology is self-consistent, and then marks the reparent as // finished by updating the global shard record. func (agent *ActionAgent) finalizeTabletExternallyReparented(ctx context.Context, si *topo.ShardInfo, ev *events.Reparent) (err error) { var wg sync.WaitGroup var errs concurrency.AllErrorRecorder oldMasterAlias := si.MasterAlias // Update the tablet records and serving graph for the old and new master concurrently. event.DispatchUpdate(ev, "updating old and new master tablet records") log.Infof("finalizeTabletExternallyReparented: updating tablet records") wg.Add(1) go func() { defer wg.Done() // Update our own record to master. updatedTablet, err := agent.TopoServer.UpdateTabletFields(ctx, agent.TabletAlias, func(tablet *topodatapb.Tablet) error { tablet.Type = topodatapb.TabletType_MASTER tablet.HealthMap = nil return nil }) if err != nil { errs.RecordError(err) return } // Update the serving graph for the tablet. if updatedTablet != nil { errs.RecordError( topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, updatedTablet)) } }() if !topoproto.TabletAliasIsZero(oldMasterAlias) { wg.Add(1) go func() { // Forcibly demote the old master in topology, since we can't rely on the // old master to be up to change its own record. oldMasterTablet, err := agent.TopoServer.UpdateTabletFields(ctx, oldMasterAlias, func(tablet *topodatapb.Tablet) error { tablet.Type = topodatapb.TabletType_SPARE return nil }) if err != nil { errs.RecordError(err) wg.Done() return } // We now know more about the old master, so add it to event data. ev.OldMaster = *oldMasterTablet // Update the serving graph. errs.RecordError( topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, oldMasterTablet)) wg.Done() // Tell the old master to re-read its tablet record and change its state. // We don't need to wait for it. tmc := tmclient.NewTabletManagerClient() tmc.RefreshState(ctx, topo.NewTabletInfo(oldMasterTablet, -1)) }() } tablet := agent.Tablet() // Wait for the tablet records to be updated. At that point, any rebuild will // see the new master, so we're ready to mark the reparent as done in the // global shard record. wg.Wait() if errs.HasErrors() { return errs.Error() } // Update the master field in the global shard record. We don't use a lock // here anymore. The lock was only to ensure that the global shard record // didn't get modified between the time when we read it and the time when we // write it back. Now we use an update loop pattern to do that instead. event.DispatchUpdate(ev, "updating global shard record") log.Infof("finalizeTabletExternallyReparented: updating global shard record") si, err = agent.TopoServer.UpdateShardFields(ctx, tablet.Keyspace, tablet.Shard, func(shard *topodatapb.Shard) error { shard.MasterAlias = tablet.Alias return nil }) if err != nil { return err } // We already took care of updating the serving graph for the old and new masters. // All that's left now is in case of a cross-cell reparent, we need to update the // master cell setting in the SrvShard records of all cells. if oldMasterAlias == nil || oldMasterAlias.Cell != tablet.Alias.Cell { event.DispatchUpdate(ev, "rebuilding shard serving graph") log.Infof("finalizeTabletExternallyReparented: updating SrvShard in all cells for cross-cell reparent") if err := topotools.UpdateAllSrvShards(ctx, agent.TopoServer, si); err != nil { return err } } event.DispatchUpdate(ev, "finished") return nil }
func (wr *Wrangler) applySchemaShardComplex(ctx context.Context, statusArray []*tabletStatus, shardInfo *topo.ShardInfo, preflight *myproto.SchemaChangeResult, masterTabletAlias *pb.TabletAlias, change string, newParentTabletAlias *pb.TabletAlias, force bool, waitSlaveTimeout time.Duration) (*myproto.SchemaChangeResult, error) { // apply the schema change to all replica / slave tablets for _, status := range statusArray { // if already applied, we skip this guy diffs := myproto.DiffSchemaToArray("after", preflight.AfterSchema, topoproto.TabletAliasString(status.ti.Alias), status.beforeSchema) if len(diffs) == 0 { log.Infof("Tablet %v already has the AfterSchema, skipping", status.ti.AliasString()) continue } // make sure the before schema matches diffs = myproto.DiffSchemaToArray("master", preflight.BeforeSchema, topoproto.TabletAliasString(status.ti.Alias), status.beforeSchema) if len(diffs) > 0 { if force { log.Warningf("Tablet %v has inconsistent schema, ignoring: %v", status.ti.AliasString(), strings.Join(diffs, "\n")) } else { return nil, fmt.Errorf("Tablet %v has inconsistent schema: %v", status.ti.AliasString(), strings.Join(diffs, "\n")) } } // take this guy out of the serving graph if necessary ti, err := wr.ts.GetTablet(ctx, status.ti.Alias) if err != nil { return nil, err } typeChangeRequired := ti.IsInServingGraph() if typeChangeRequired { // note we want to update the serving graph there err = wr.changeTypeInternal(ctx, ti.Alias, pb.TabletType_SCHEMA_UPGRADE) if err != nil { return nil, err } } // apply the schema change log.Infof("Applying schema change to slave %v in complex mode", status.ti.AliasString()) sc := &myproto.SchemaChange{Sql: change, Force: force, AllowReplication: false, BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema} _, err = wr.ApplySchema(ctx, status.ti.Alias, sc) if err != nil { return nil, err } // put this guy back into the serving graph if typeChangeRequired { err = wr.changeTypeInternal(ctx, ti.Alias, ti.Tablet.Type) if err != nil { return nil, err } } } // if newParentTabletAlias is passed in, use that as the new master if !topoproto.TabletAliasIsZero(newParentTabletAlias) { log.Infof("Reparenting with new master set to %v", topoproto.TabletAliasString(newParentTabletAlias)) oldMasterAlias := shardInfo.MasterAlias // Create reusable Reparent event with available info ev := &events.Reparent{} if err := wr.plannedReparentShardLocked(ctx, ev, shardInfo.Keyspace(), shardInfo.ShardName(), newParentTabletAlias, waitSlaveTimeout); err != nil { return nil, err } // Here we would apply the schema change to the old // master, but we just scrap it, to be consistent // with the previous implementation of the reparent. // (this code will be refactored at some point anyway) if err := wr.Scrap(ctx, oldMasterAlias, false, false); err != nil { wr.Logger().Warningf("Scrapping old master %v from shard %v/%v failed: %v", topoproto.TabletAliasString(oldMasterAlias), shardInfo.Keyspace(), shardInfo.ShardName(), err) } } return &myproto.SchemaChangeResult{BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema}, nil }
// finalizeTabletExternallyReparented performs slow, synchronized reconciliation // tasks that ensure topology is self-consistent, and then marks the reparent as // finished by updating the global shard record. func (agent *ActionAgent) finalizeTabletExternallyReparented(ctx context.Context, si *topo.ShardInfo, ev *events.Reparent) (err error) { var wg sync.WaitGroup var errs concurrency.AllErrorRecorder oldMasterAlias := si.MasterAlias // Update the tablet records concurrently. event.DispatchUpdate(ev, "updating old and new master tablet records") log.Infof("finalizeTabletExternallyReparented: updating tablet records") wg.Add(1) go func() { defer wg.Done() // Update our own record to master. _, err := agent.TopoServer.UpdateTabletFields(ctx, agent.TabletAlias, func(tablet *topodatapb.Tablet) error { tablet.Type = topodatapb.TabletType_MASTER return nil }) if err != nil { errs.RecordError(err) } }() if !topoproto.TabletAliasIsZero(oldMasterAlias) { wg.Add(1) go func() { // Forcibly demote the old master in topology, since we can't rely on the // old master to be up to change its own record. oldMasterTablet, err := agent.TopoServer.UpdateTabletFields(ctx, oldMasterAlias, func(tablet *topodatapb.Tablet) error { tablet.Type = topodatapb.TabletType_REPLICA return nil }) if err != nil { errs.RecordError(err) wg.Done() return } // We now know more about the old master, so add it to event data. ev.OldMaster = *oldMasterTablet wg.Done() // Tell the old master to re-read its tablet record and change its state. // We don't need to wait for it. tmc := tmclient.NewTabletManagerClient() tmc.RefreshState(ctx, oldMasterTablet) }() } tablet := agent.Tablet() // Wait for the tablet records to be updated. At that point, any rebuild will // see the new master, so we're ready to mark the reparent as done in the // global shard record. wg.Wait() if errs.HasErrors() { return errs.Error() } // Update the master field in the global shard record. We don't use a lock // here anymore. The lock was only to ensure that the global shard record // didn't get modified between the time when we read it and the time when we // write it back. Now we use an update loop pattern to do that instead. event.DispatchUpdate(ev, "updating global shard record") log.Infof("finalizeTabletExternallyReparented: updating global shard record if needed") _, err = agent.TopoServer.UpdateShardFields(ctx, tablet.Keyspace, tablet.Shard, func(si *topo.ShardInfo) error { if topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) { return topo.ErrNoUpdateNeeded } si.MasterAlias = tablet.Alias return nil }) if err != nil { return err } event.DispatchUpdate(ev, "finished") return nil }