// helper method to asynchronously diff a schema func (wr *Wrangler) diffSchema(ctx context.Context, masterSchema *tabletmanagerdatapb.SchemaDefinition, masterTabletAlias, alias *topodatapb.TabletAlias, excludeTables []string, includeViews bool, wg *sync.WaitGroup, er concurrency.ErrorRecorder) { defer wg.Done() log.Infof("Gathering schema for %v", topoproto.TabletAliasString(alias)) slaveSchema, err := wr.GetSchema(ctx, alias, nil, excludeTables, includeViews) if err != nil { er.RecordError(err) return } log.Infof("Diffing schema for %v", topoproto.TabletAliasString(alias)) tmutils.DiffSchema(topoproto.TabletAliasString(masterTabletAlias), masterSchema, topoproto.TabletAliasString(alias), slaveSchema, er) }
func (sdw *SplitDiffWorker) diff(ctx context.Context) error { sdw.SetState(WorkerStateDiff) sdw.wr.Logger().Infof("Gathering schema information...") sdw.sourceSchemaDefinitions = make([]*pbt.SchemaDefinition, len(sdw.sourceAliases)) wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} wg.Add(1) go func() { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sdw.destinationSchemaDefinition, err = sdw.wr.GetSchema( shortCtx, sdw.destinationAlias, nil /* tables */, sdw.excludeTables, false /* includeViews */) cancel() rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from destination %v", sdw.destinationAlias) wg.Done() }() for i, sourceAlias := range sdw.sourceAliases { wg.Add(1) go func(i int, sourceAlias pb.TabletAlias) { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sdw.sourceSchemaDefinitions[i], err = sdw.wr.GetSchema( shortCtx, &sourceAlias, nil /* tables */, sdw.excludeTables, false /* includeViews */) cancel() rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from source[%v] %v", i, sourceAlias) wg.Done() }(i, *sourceAlias) } wg.Wait() if rec.HasErrors() { return rec.Error() } // TODO(alainjobart) Checking against each source may be // overkill, if all sources have the same schema? sdw.wr.Logger().Infof("Diffing the schema...") rec = concurrency.AllErrorRecorder{} for i, sourceSchemaDefinition := range sdw.sourceSchemaDefinitions { sourceName := fmt.Sprintf("source[%v]", i) tmutils.DiffSchema("destination", sdw.destinationSchemaDefinition, sourceName, sourceSchemaDefinition, &rec) } if rec.HasErrors() { sdw.wr.Logger().Warningf("Different schemas: %v", rec.Error().Error()) } else { sdw.wr.Logger().Infof("Schema match, good.") } // run the diffs, 8 at a time sdw.wr.Logger().Infof("Running the diffs...") sem := sync2.NewSemaphore(8, 0) for _, tableDefinition := range sdw.destinationSchemaDefinition.TableDefinitions { wg.Add(1) go func(tableDefinition *pbt.TableDefinition) { defer wg.Done() sem.Acquire() defer sem.Release() sdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name) if len(sdw.sourceAliases) != 1 { err := fmt.Errorf("Don't support more than one source for table yet: %v", tableDefinition.Name) rec.RecordError(err) sdw.wr.Logger().Errorf(err.Error()) return } overlap, err := key.KeyRangesOverlap(sdw.shardInfo.KeyRange, sdw.shardInfo.SourceShards[0].KeyRange) if err != nil { newErr := fmt.Errorf("Source shard doesn't overlap with destination????: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } sourceQueryResultReader, err := TableScanByKeyRange(ctx, sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.sourceAliases[0], tableDefinition, overlap, sdw.keyspaceInfo.ShardingColumnType) if err != nil { newErr := fmt.Errorf("TableScanByKeyRange(source) failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } defer sourceQueryResultReader.Close() destinationQueryResultReader, err := TableScanByKeyRange(ctx, sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.destinationAlias, tableDefinition, nil, sdw.keyspaceInfo.ShardingColumnType) if err != nil { newErr := fmt.Errorf("TableScanByKeyRange(destination) failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } defer destinationQueryResultReader.Close() differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition) if err != nil { newErr := fmt.Errorf("NewRowDiffer() failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) return } report, err := differ.Go(sdw.wr.Logger()) if err != nil { newErr := fmt.Errorf("Differ.Go failed: %v", err.Error()) rec.RecordError(newErr) sdw.wr.Logger().Errorf(newErr.Error()) } else { if report.HasDifferences() { err := fmt.Errorf("Table %v has differences: %v", tableDefinition.Name, report.String()) rec.RecordError(err) sdw.wr.Logger().Warningf(err.Error()) } else { sdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS) } } }(tableDefinition) } wg.Wait() return rec.Error() }
func (vsdw *VerticalSplitDiffWorker) diff(ctx context.Context) error { vsdw.SetState(WorkerStateDiff) vsdw.wr.Logger().Infof("Gathering schema information...") wg := sync.WaitGroup{} rec := &concurrency.AllErrorRecorder{} wg.Add(1) go func() { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) vsdw.destinationSchemaDefinition, err = vsdw.wr.GetSchema( shortCtx, vsdw.destinationAlias, vsdw.shardInfo.SourceShards[0].Tables, nil /* excludeTables */, false /* includeViews */) cancel() rec.RecordError(err) vsdw.wr.Logger().Infof("Got schema from destination %v", topoproto.TabletAliasString(vsdw.destinationAlias)) wg.Done() }() wg.Add(1) go func() { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) vsdw.sourceSchemaDefinition, err = vsdw.wr.GetSchema( shortCtx, vsdw.sourceAlias, vsdw.shardInfo.SourceShards[0].Tables, nil /* excludeTables */, false /* includeViews */) cancel() rec.RecordError(err) vsdw.wr.Logger().Infof("Got schema from source %v", topoproto.TabletAliasString(vsdw.sourceAlias)) wg.Done() }() wg.Wait() if rec.HasErrors() { return rec.Error() } // Check the schema vsdw.wr.Logger().Infof("Diffing the schema...") rec = &concurrency.AllErrorRecorder{} tmutils.DiffSchema("destination", vsdw.destinationSchemaDefinition, "source", vsdw.sourceSchemaDefinition, rec) if rec.HasErrors() { vsdw.wr.Logger().Warningf("Different schemas: %v", rec.Error()) } else { vsdw.wr.Logger().Infof("Schema match, good.") } // run the diffs, 8 at a time vsdw.wr.Logger().Infof("Running the diffs...") sem := sync2.NewSemaphore(8, 0) for _, tableDefinition := range vsdw.destinationSchemaDefinition.TableDefinitions { wg.Add(1) go func(tableDefinition *tabletmanagerdatapb.TableDefinition) { defer wg.Done() sem.Acquire() defer sem.Release() vsdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name) sourceQueryResultReader, err := TableScan(ctx, vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.sourceAlias, tableDefinition) if err != nil { newErr := fmt.Errorf("TableScan(source) failed: %v", err) rec.RecordError(newErr) vsdw.wr.Logger().Errorf("%v", newErr) return } defer sourceQueryResultReader.Close() destinationQueryResultReader, err := TableScan(ctx, vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.destinationAlias, tableDefinition) if err != nil { newErr := fmt.Errorf("TableScan(destination) failed: %v", err) rec.RecordError(newErr) vsdw.wr.Logger().Errorf("%v", newErr) return } defer destinationQueryResultReader.Close() differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition) if err != nil { newErr := fmt.Errorf("NewRowDiffer() failed: %v", err) rec.RecordError(newErr) vsdw.wr.Logger().Errorf("%v", newErr) return } report, err := differ.Go(vsdw.wr.Logger()) if err != nil { vsdw.wr.Logger().Errorf("Differ.Go failed: %v", err) } else { if report.HasDifferences() { err := fmt.Errorf("Table %v has differences: %v", tableDefinition.Name, report.String()) rec.RecordError(err) vsdw.wr.Logger().Errorf("%v", err) } else { vsdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS) } } }(tableDefinition) } wg.Wait() return rec.Error() }
func (vsdw *VerticalSplitDiffWorker) diff(ctx context.Context) error { vsdw.SetState(WorkerStateDiff) vsdw.wr.Logger().Infof("Gathering schema information...") wg := sync.WaitGroup{} rec := concurrency.AllErrorRecorder{} wg.Add(1) go func() { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) vsdw.destinationSchemaDefinition, err = vsdw.wr.GetSchema( shortCtx, vsdw.destinationAlias, nil /* tables */, vsdw.excludeTables, false /* includeViews */) cancel() rec.RecordError(err) vsdw.wr.Logger().Infof("Got schema from destination %v", topoproto.TabletAliasString(vsdw.destinationAlias)) wg.Done() }() wg.Add(1) go func() { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) vsdw.sourceSchemaDefinition, err = vsdw.wr.GetSchema( shortCtx, vsdw.sourceAlias, nil /* tables */, vsdw.excludeTables, false /* includeViews */) cancel() rec.RecordError(err) vsdw.wr.Logger().Infof("Got schema from source %v", topoproto.TabletAliasString(vsdw.sourceAlias)) wg.Done() }() wg.Wait() if rec.HasErrors() { return rec.Error() } // Build a list of regexp to exclude tables from source schema tableRegexps := make([]*regexp.Regexp, len(vsdw.shardInfo.SourceShards[0].Tables)) for i, table := range vsdw.shardInfo.SourceShards[0].Tables { var err error tableRegexps[i], err = regexp.Compile(table) if err != nil { return fmt.Errorf("cannot compile regexp %v for table: %v", table, err) } } // Remove the tables we don't need from the source schema newSourceTableDefinitions := make([]*tabletmanagerdatapb.TableDefinition, 0, len(vsdw.destinationSchemaDefinition.TableDefinitions)) for _, tableDefinition := range vsdw.sourceSchemaDefinition.TableDefinitions { found := false for _, tableRegexp := range tableRegexps { if tableRegexp.MatchString(tableDefinition.Name) { found = true break } } if !found { vsdw.wr.Logger().Infof("Removing table %v from source schema", tableDefinition.Name) continue } newSourceTableDefinitions = append(newSourceTableDefinitions, tableDefinition) } vsdw.sourceSchemaDefinition.TableDefinitions = newSourceTableDefinitions // Check the schema vsdw.wr.Logger().Infof("Diffing the schema...") rec = concurrency.AllErrorRecorder{} tmutils.DiffSchema("destination", vsdw.destinationSchemaDefinition, "source", vsdw.sourceSchemaDefinition, &rec) if rec.HasErrors() { vsdw.wr.Logger().Warningf("Different schemas: %v", rec.Error()) } else { vsdw.wr.Logger().Infof("Schema match, good.") } // run the diffs, 8 at a time vsdw.wr.Logger().Infof("Running the diffs...") sem := sync2.NewSemaphore(8, 0) for _, tableDefinition := range vsdw.destinationSchemaDefinition.TableDefinitions { wg.Add(1) go func(tableDefinition *tabletmanagerdatapb.TableDefinition) { defer wg.Done() sem.Acquire() defer sem.Release() vsdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name) sourceQueryResultReader, err := TableScan(ctx, vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.sourceAlias, tableDefinition) if err != nil { newErr := fmt.Errorf("TableScan(source) failed: %v", err) rec.RecordError(newErr) vsdw.wr.Logger().Errorf(newErr.Error()) return } defer sourceQueryResultReader.Close() destinationQueryResultReader, err := TableScan(ctx, vsdw.wr.Logger(), vsdw.wr.TopoServer(), vsdw.destinationAlias, tableDefinition) if err != nil { newErr := fmt.Errorf("TableScan(destination) failed: %v", err) rec.RecordError(newErr) vsdw.wr.Logger().Errorf(newErr.Error()) return } defer destinationQueryResultReader.Close() differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition) if err != nil { newErr := fmt.Errorf("NewRowDiffer() failed: %v", err) rec.RecordError(newErr) vsdw.wr.Logger().Errorf(newErr.Error()) return } report, err := differ.Go(vsdw.wr.Logger()) if err != nil { vsdw.wr.Logger().Errorf("Differ.Go failed: %v", err) } else { if report.HasDifferences() { err := fmt.Errorf("Table %v has differences: %v", tableDefinition.Name, report.String()) rec.RecordError(err) vsdw.wr.Logger().Errorf(err.Error()) } else { vsdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS) } } }(tableDefinition) } wg.Wait() return rec.Error() }
func (sdw *SplitDiffWorker) diff(ctx context.Context) error { sdw.SetState(WorkerStateDiff) sdw.wr.Logger().Infof("Gathering schema information...") wg := sync.WaitGroup{} rec := &concurrency.AllErrorRecorder{} wg.Add(1) go func() { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sdw.destinationSchemaDefinition, err = sdw.wr.GetSchema( shortCtx, sdw.destinationAlias, nil /* tables */, sdw.excludeTables, false /* includeViews */) cancel() rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from destination %v", sdw.destinationAlias) wg.Done() }() wg.Add(1) go func() { var err error shortCtx, cancel := context.WithTimeout(ctx, *remoteActionsTimeout) sdw.sourceSchemaDefinition, err = sdw.wr.GetSchema( shortCtx, sdw.sourceAlias, nil /* tables */, sdw.excludeTables, false /* includeViews */) cancel() rec.RecordError(err) sdw.wr.Logger().Infof("Got schema from source %v", sdw.sourceAlias) wg.Done() }() wg.Wait() if rec.HasErrors() { return rec.Error() } sdw.wr.Logger().Infof("Diffing the schema...") rec = &concurrency.AllErrorRecorder{} tmutils.DiffSchema("destination", sdw.destinationSchemaDefinition, "source", sdw.sourceSchemaDefinition, rec) if rec.HasErrors() { sdw.wr.Logger().Warningf("Different schemas: %v", rec.Error().Error()) } else { sdw.wr.Logger().Infof("Schema match, good.") } // read the vschema if needed var keyspaceSchema *vindexes.KeyspaceSchema if *useV3ReshardingMode { kschema, err := sdw.wr.TopoServer().GetVSchema(ctx, sdw.keyspace) if err != nil { return fmt.Errorf("cannot load VSchema for keyspace %v: %v", sdw.keyspace, err) } if kschema == nil { return fmt.Errorf("no VSchema for keyspace %v", sdw.keyspace) } keyspaceSchema, err = vindexes.BuildKeyspaceSchema(kschema, sdw.keyspace) if err != nil { return fmt.Errorf("cannot build vschema for keyspace %v: %v", sdw.keyspace, err) } } // Compute the overlap keyrange. Later, we'll compare it with // source or destination keyrange. If it matches either, // we'll just ask for all the data. If the overlap is a subset, // we'll filter. overlap, err := key.KeyRangesOverlap(sdw.shardInfo.KeyRange, sdw.shardInfo.SourceShards[sdw.sourceUID].KeyRange) if err != nil { return fmt.Errorf("Source shard doesn't overlap with destination: %v", err) } // run the diffs, 8 at a time sdw.wr.Logger().Infof("Running the diffs...") // TODO(mberlin): Parameterize the hard coded value 8. sem := sync2.NewSemaphore(8, 0) for _, tableDefinition := range sdw.destinationSchemaDefinition.TableDefinitions { wg.Add(1) go func(tableDefinition *tabletmanagerdatapb.TableDefinition) { defer wg.Done() sem.Acquire() defer sem.Release() sdw.wr.Logger().Infof("Starting the diff on table %v", tableDefinition.Name) // On the source, see if we need a full scan // or a filtered scan. var sourceQueryResultReader *QueryResultReader if key.KeyRangeEqual(overlap, sdw.shardInfo.SourceShards[sdw.sourceUID].KeyRange) { sourceQueryResultReader, err = TableScan(ctx, sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.sourceAlias, tableDefinition) } else { sourceQueryResultReader, err = TableScanByKeyRange(ctx, sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.sourceAlias, tableDefinition, overlap, keyspaceSchema, sdw.keyspaceInfo.ShardingColumnName, sdw.keyspaceInfo.ShardingColumnType) } if err != nil { newErr := fmt.Errorf("TableScan(ByKeyRange?)(source) failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf("%v", newErr) return } defer sourceQueryResultReader.Close(ctx) // On the destination, see if we need a full scan // or a filtered scan. var destinationQueryResultReader *QueryResultReader if key.KeyRangeEqual(overlap, sdw.shardInfo.KeyRange) { destinationQueryResultReader, err = TableScan(ctx, sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.destinationAlias, tableDefinition) } else { destinationQueryResultReader, err = TableScanByKeyRange(ctx, sdw.wr.Logger(), sdw.wr.TopoServer(), sdw.destinationAlias, tableDefinition, overlap, keyspaceSchema, sdw.keyspaceInfo.ShardingColumnName, sdw.keyspaceInfo.ShardingColumnType) } if err != nil { newErr := fmt.Errorf("TableScan(ByKeyRange?)(destination) failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf("%v", newErr) return } defer destinationQueryResultReader.Close(ctx) // Create the row differ. differ, err := NewRowDiffer(sourceQueryResultReader, destinationQueryResultReader, tableDefinition) if err != nil { newErr := fmt.Errorf("NewRowDiffer() failed: %v", err) rec.RecordError(newErr) sdw.wr.Logger().Errorf("%v", newErr) return } // And run the diff. report, err := differ.Go(sdw.wr.Logger()) if err != nil { newErr := fmt.Errorf("Differ.Go failed: %v", err.Error()) rec.RecordError(newErr) sdw.wr.Logger().Errorf("%v", newErr) } else { if report.HasDifferences() { err := fmt.Errorf("Table %v has differences: %v", tableDefinition.Name, report.String()) rec.RecordError(err) sdw.wr.Logger().Warningf(err.Error()) } else { sdw.wr.Logger().Infof("Table %v checks out (%v rows processed, %v qps)", tableDefinition.Name, report.processedRows, report.processingQPS) } } }(tableDefinition) } wg.Wait() return rec.Error() }