Beispiel #1
0
// updateShardCellsAndMaster will update the 'Cells' and possibly
// MasterAlias records for the shard, if needed.
func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType, allowMasterOverride bool) error {
	// See if we need to update the Shard:
	// - add the tablet's cell to the shard's Cells if needed
	// - change the master if needed
	shardUpdateRequired := false
	if !si.HasCell(tabletAlias.Cell) {
		shardUpdateRequired = true
	}
	if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) {
		shardUpdateRequired = true
	}
	if !shardUpdateRequired {
		return nil
	}

	// we do need to update the shard, lock it to not interfere with
	// reparenting operations.
	actionNode := actionnode.UpdateShard()
	keyspace := si.Keyspace()
	shard := si.ShardName()
	lockPath, err := wr.lockShard(ctx, keyspace, shard, actionNode)
	if err != nil {
		return err
	}

	// run the update
	_, err = wr.ts.UpdateShardFields(ctx, keyspace, shard, func(s *topodatapb.Shard) error {
		wasUpdated := false
		if !topoproto.ShardHasCell(s, tabletAlias.Cell) {
			s.Cells = append(s.Cells, tabletAlias.Cell)
			wasUpdated = true
		}

		if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(s.MasterAlias, tabletAlias) {
			if !topoproto.TabletAliasIsZero(s.MasterAlias) && !allowMasterOverride {
				return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topoproto.TabletAliasString(s.MasterAlias), keyspace, shard)
			}
			s.MasterAlias = tabletAlias
			wasUpdated = true
		}

		if !wasUpdated {
			return topo.ErrNoUpdateNeeded
		}
		return nil
	})
	return wr.unlockShard(ctx, keyspace, shard, actionNode, lockPath, err)
}
Beispiel #2
0
// updateShardCellsAndMaster will update the 'Cells' and possibly
// MasterAlias records for the shard, if needed.
func (wr *Wrangler) updateShardCellsAndMaster(ctx context.Context, si *topo.ShardInfo, tabletAlias *topodatapb.TabletAlias, tabletType topodatapb.TabletType, allowMasterOverride bool) error {
	// See if we need to update the Shard:
	// - add the tablet's cell to the shard's Cells if needed
	// - change the master if needed
	shardUpdateRequired := false
	if !si.HasCell(tabletAlias.Cell) {
		shardUpdateRequired = true
	}
	if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(si.MasterAlias, tabletAlias) {
		shardUpdateRequired = true
	}
	if !shardUpdateRequired {
		return nil
	}

	// run the update
	_, err := wr.ts.UpdateShardFields(ctx, si.Keyspace(), si.ShardName(), func(s *topo.ShardInfo) error {
		wasUpdated := false
		if !s.HasCell(tabletAlias.Cell) {
			s.Cells = append(s.Cells, tabletAlias.Cell)
			wasUpdated = true
		}

		if tabletType == topodatapb.TabletType_MASTER && !topoproto.TabletAliasEqual(s.MasterAlias, tabletAlias) {
			if !topoproto.TabletAliasIsZero(s.MasterAlias) && !allowMasterOverride {
				return fmt.Errorf("creating this tablet would override old master %v in shard %v/%v", topoproto.TabletAliasString(s.MasterAlias), si.Keyspace(), si.ShardName())
			}
			s.MasterAlias = tabletAlias
			wasUpdated = true
		}

		if !wasUpdated {
			return topo.ErrNoUpdateNeeded
		}
		return nil
	})
	return err
}
Beispiel #3
0
// HasMaster returns true if the Shard has an assigned Master.
func (si *ShardInfo) HasMaster() bool {
	return !topoproto.TabletAliasIsZero(si.Shard.MasterAlias)
}
Beispiel #4
0
// finalizeTabletExternallyReparented performs slow, synchronized reconciliation
// tasks that ensure topology is self-consistent, and then marks the reparent as
// finished by updating the global shard record.
func (agent *ActionAgent) finalizeTabletExternallyReparented(ctx context.Context, si *topo.ShardInfo, ev *events.Reparent) (err error) {
	var wg sync.WaitGroup
	var errs concurrency.AllErrorRecorder
	oldMasterAlias := si.MasterAlias

	// Update the tablet records and serving graph for the old and new master concurrently.
	event.DispatchUpdate(ev, "updating old and new master tablet records")
	log.Infof("finalizeTabletExternallyReparented: updating tablet records")
	wg.Add(1)
	go func() {
		defer wg.Done()
		// Update our own record to master.
		updatedTablet, err := agent.TopoServer.UpdateTabletFields(ctx, agent.TabletAlias,
			func(tablet *topodatapb.Tablet) error {
				tablet.Type = topodatapb.TabletType_MASTER
				tablet.HealthMap = nil
				return nil
			})
		if err != nil {
			errs.RecordError(err)
			return
		}

		// Update the serving graph for the tablet.
		if updatedTablet != nil {
			errs.RecordError(
				topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, updatedTablet))
		}
	}()

	if !topoproto.TabletAliasIsZero(oldMasterAlias) {
		wg.Add(1)
		go func() {
			// Forcibly demote the old master in topology, since we can't rely on the
			// old master to be up to change its own record.
			oldMasterTablet, err := agent.TopoServer.UpdateTabletFields(ctx, oldMasterAlias,
				func(tablet *topodatapb.Tablet) error {
					tablet.Type = topodatapb.TabletType_SPARE
					return nil
				})
			if err != nil {
				errs.RecordError(err)
				wg.Done()
				return
			}

			// We now know more about the old master, so add it to event data.
			ev.OldMaster = *oldMasterTablet

			// Update the serving graph.
			errs.RecordError(
				topotools.UpdateTabletEndpoints(ctx, agent.TopoServer, oldMasterTablet))
			wg.Done()

			// Tell the old master to re-read its tablet record and change its state.
			// We don't need to wait for it.
			tmc := tmclient.NewTabletManagerClient()
			tmc.RefreshState(ctx, topo.NewTabletInfo(oldMasterTablet, -1))
		}()
	}

	tablet := agent.Tablet()

	// Wait for the tablet records to be updated. At that point, any rebuild will
	// see the new master, so we're ready to mark the reparent as done in the
	// global shard record.
	wg.Wait()
	if errs.HasErrors() {
		return errs.Error()
	}

	// Update the master field in the global shard record. We don't use a lock
	// here anymore. The lock was only to ensure that the global shard record
	// didn't get modified between the time when we read it and the time when we
	// write it back. Now we use an update loop pattern to do that instead.
	event.DispatchUpdate(ev, "updating global shard record")
	log.Infof("finalizeTabletExternallyReparented: updating global shard record")
	si, err = agent.TopoServer.UpdateShardFields(ctx, tablet.Keyspace, tablet.Shard, func(shard *topodatapb.Shard) error {
		shard.MasterAlias = tablet.Alias
		return nil
	})
	if err != nil {
		return err
	}

	// We already took care of updating the serving graph for the old and new masters.
	// All that's left now is in case of a cross-cell reparent, we need to update the
	// master cell setting in the SrvShard records of all cells.
	if oldMasterAlias == nil || oldMasterAlias.Cell != tablet.Alias.Cell {
		event.DispatchUpdate(ev, "rebuilding shard serving graph")
		log.Infof("finalizeTabletExternallyReparented: updating SrvShard in all cells for cross-cell reparent")
		if err := topotools.UpdateAllSrvShards(ctx, agent.TopoServer, si); err != nil {
			return err
		}
	}

	event.DispatchUpdate(ev, "finished")
	return nil
}
Beispiel #5
0
func (wr *Wrangler) applySchemaShardComplex(ctx context.Context, statusArray []*tabletStatus, shardInfo *topo.ShardInfo, preflight *myproto.SchemaChangeResult, masterTabletAlias *pb.TabletAlias, change string, newParentTabletAlias *pb.TabletAlias, force bool, waitSlaveTimeout time.Duration) (*myproto.SchemaChangeResult, error) {
	// apply the schema change to all replica / slave tablets
	for _, status := range statusArray {
		// if already applied, we skip this guy
		diffs := myproto.DiffSchemaToArray("after", preflight.AfterSchema, topoproto.TabletAliasString(status.ti.Alias), status.beforeSchema)
		if len(diffs) == 0 {
			log.Infof("Tablet %v already has the AfterSchema, skipping", status.ti.AliasString())
			continue
		}

		// make sure the before schema matches
		diffs = myproto.DiffSchemaToArray("master", preflight.BeforeSchema, topoproto.TabletAliasString(status.ti.Alias), status.beforeSchema)
		if len(diffs) > 0 {
			if force {
				log.Warningf("Tablet %v has inconsistent schema, ignoring: %v", status.ti.AliasString(), strings.Join(diffs, "\n"))
			} else {
				return nil, fmt.Errorf("Tablet %v has inconsistent schema: %v", status.ti.AliasString(), strings.Join(diffs, "\n"))
			}
		}

		// take this guy out of the serving graph if necessary
		ti, err := wr.ts.GetTablet(ctx, status.ti.Alias)
		if err != nil {
			return nil, err
		}
		typeChangeRequired := ti.IsInServingGraph()
		if typeChangeRequired {
			// note we want to update the serving graph there
			err = wr.changeTypeInternal(ctx, ti.Alias, pb.TabletType_SCHEMA_UPGRADE)
			if err != nil {
				return nil, err
			}
		}

		// apply the schema change
		log.Infof("Applying schema change to slave %v in complex mode", status.ti.AliasString())
		sc := &myproto.SchemaChange{Sql: change, Force: force, AllowReplication: false, BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema}
		_, err = wr.ApplySchema(ctx, status.ti.Alias, sc)
		if err != nil {
			return nil, err
		}

		// put this guy back into the serving graph
		if typeChangeRequired {
			err = wr.changeTypeInternal(ctx, ti.Alias, ti.Tablet.Type)
			if err != nil {
				return nil, err
			}
		}
	}

	// if newParentTabletAlias is passed in, use that as the new master
	if !topoproto.TabletAliasIsZero(newParentTabletAlias) {
		log.Infof("Reparenting with new master set to %v", topoproto.TabletAliasString(newParentTabletAlias))
		oldMasterAlias := shardInfo.MasterAlias

		// Create reusable Reparent event with available info
		ev := &events.Reparent{}

		if err := wr.plannedReparentShardLocked(ctx, ev, shardInfo.Keyspace(), shardInfo.ShardName(), newParentTabletAlias, waitSlaveTimeout); err != nil {
			return nil, err
		}

		// Here we would apply the schema change to the old
		// master, but we just scrap it, to be consistent
		// with the previous implementation of the reparent.
		// (this code will be refactored at some point anyway)
		if err := wr.Scrap(ctx, oldMasterAlias, false, false); err != nil {
			wr.Logger().Warningf("Scrapping old master %v from shard %v/%v failed: %v", topoproto.TabletAliasString(oldMasterAlias), shardInfo.Keyspace(), shardInfo.ShardName(), err)
		}
	}
	return &myproto.SchemaChangeResult{BeforeSchema: preflight.BeforeSchema, AfterSchema: preflight.AfterSchema}, nil
}
Beispiel #6
0
// finalizeTabletExternallyReparented performs slow, synchronized reconciliation
// tasks that ensure topology is self-consistent, and then marks the reparent as
// finished by updating the global shard record.
func (agent *ActionAgent) finalizeTabletExternallyReparented(ctx context.Context, si *topo.ShardInfo, ev *events.Reparent) (err error) {
	var wg sync.WaitGroup
	var errs concurrency.AllErrorRecorder
	oldMasterAlias := si.MasterAlias

	// Update the tablet records concurrently.
	event.DispatchUpdate(ev, "updating old and new master tablet records")
	log.Infof("finalizeTabletExternallyReparented: updating tablet records")
	wg.Add(1)
	go func() {
		defer wg.Done()
		// Update our own record to master.
		_, err := agent.TopoServer.UpdateTabletFields(ctx, agent.TabletAlias,
			func(tablet *topodatapb.Tablet) error {
				tablet.Type = topodatapb.TabletType_MASTER
				return nil
			})
		if err != nil {
			errs.RecordError(err)
		}
	}()

	if !topoproto.TabletAliasIsZero(oldMasterAlias) {
		wg.Add(1)
		go func() {
			// Forcibly demote the old master in topology, since we can't rely on the
			// old master to be up to change its own record.
			oldMasterTablet, err := agent.TopoServer.UpdateTabletFields(ctx, oldMasterAlias,
				func(tablet *topodatapb.Tablet) error {
					tablet.Type = topodatapb.TabletType_REPLICA
					return nil
				})
			if err != nil {
				errs.RecordError(err)
				wg.Done()
				return
			}

			// We now know more about the old master, so add it to event data.
			ev.OldMaster = *oldMasterTablet
			wg.Done()

			// Tell the old master to re-read its tablet record and change its state.
			// We don't need to wait for it.
			tmc := tmclient.NewTabletManagerClient()
			tmc.RefreshState(ctx, oldMasterTablet)
		}()
	}

	tablet := agent.Tablet()

	// Wait for the tablet records to be updated. At that point, any rebuild will
	// see the new master, so we're ready to mark the reparent as done in the
	// global shard record.
	wg.Wait()
	if errs.HasErrors() {
		return errs.Error()
	}

	// Update the master field in the global shard record. We don't use a lock
	// here anymore. The lock was only to ensure that the global shard record
	// didn't get modified between the time when we read it and the time when we
	// write it back. Now we use an update loop pattern to do that instead.
	event.DispatchUpdate(ev, "updating global shard record")
	log.Infof("finalizeTabletExternallyReparented: updating global shard record if needed")
	_, err = agent.TopoServer.UpdateShardFields(ctx, tablet.Keyspace, tablet.Shard, func(si *topo.ShardInfo) error {
		if topoproto.TabletAliasEqual(si.MasterAlias, tablet.Alias) {
			return topo.ErrNoUpdateNeeded
		}
		si.MasterAlias = tablet.Alias
		return nil
	})
	if err != nil {
		return err
	}

	event.DispatchUpdate(ev, "finished")
	return nil
}